Ran into tons of production issues yesterday…still lots of ongoing issue. So I am going to keep the recent posts short. This one is for sort algorithms.
Insert Sort 1: use binary search to find the insert point
def insert_sort(l): """ >>> l = [32, 64, 15, 38, 87, 13, 53, 12] >>> insert_sort(l) >>> print(l) [12, 13, 15, 32, 38, 53, 64, 87] >>> l = prog_1_4_gen_list_2(1 << 8, 99) >>> insert_sort(l) >>> test_verify_sorted(l) True """ # loop invariant: # i indicates l[0:i] is sorted and l[i] needs to be inserted # init: i = 1 # terminate: i = len(l) for i in range(1, len(l)): val = l[i] # find insert point insert_point = prog_2_A_binary_search(l[0:i], l[i], True) # right shift the l[insert_point:i] by 1 # loop invariant: # j indicates the current empty position, and l[j-1] needs to be shifted # init: j = i # termination: j = insert_point for j in range(i, insert_point, -1): l[j] = l[j - 1] # insert the value into the position l[insert_point] = val
Insert Sort 2: scan back while shifting
def insert_sort2(l): """ >>> l = [32, 64, 15, 38, 87, 13, 53, 12] >>> insert_sort2(l) >>> print(l) [12, 13, 15, 32, 38, 53, 64, 87] """ # loop invariant: # i indicates l[0:i] is sorted and l[i] needs to be inserted # init: i = 1 # termination: i = len(l) for i in range(1, len(l)): val = l[i] # find insert point, while shifting from left # loop invariant, j indicates: # - current empty position, and l[j-1] needs to be shifted # - l[j:i] contains all elements > val # - l[0:j] are to be scanned # termination: j == 0 (no l[j - 1]), or l[j - 1] > val j = i while j > 0 and val < l[j - 1]: l[j] = l[j - 1] j -= 1 # need to insert the value to the insert point l[j] = val
Heap Sort: heap_insert and heap_pop primitives
def heap_sort(l): """ >>> l = [106, 136, 124, 63, 76, 147, 92, 35] >>> heap_sort(l) >>> print(l) [35, 63, 76, 92, 106, 124, 136, 147] >>> l = prog_1_4_gen_list_2(1 << 8, 99) >>> heap_sort(l) >>> test_verify_sorted(l) True """ def heap_insert(l, end): # input contract: # - l[0:end] is already heap # - end is the new element to insert # output contact: # l[0:end+1] is heap # # process: loop to 'bubble up' the l[end] element. # invariant: # - p to indicate the current parent node to compare # - pos to indicate the current position of the new element # - terminal condition: p is out of scope, or l[p] is already bigger def parent(pos): return (pos-1) >> 1 assert(end >= 1) p = parent(end) pos = end val = l[pos] while p >= 0 and l[p] < val: l[p], l[pos] = l[pos], l[p] pos = p p = parent(pos) return def heap_pop(l, end): # input contract (assuming len = len(l)): # - l[0:end+1] is heap # output contract: # - l[0:end] is heap # - l[end] is the element that is pop up # # firstly, move the last element to be the new heap top; l[0], l[end] = l[end], l[0] # secondly, 'bubble down' the top element # loop invariant: # - n to indicate the current location of the element # termination: n is the leaf node, or n is already bigger def left_child(pos): return (n << 1) + 1 n = 0 val = l[n] left_child_n = left_child(n) while left_child_n < end: right_child_n = left_child_n + 1 # case 1: only left child exists if right_child_n >= end: if l[left_child_n] > val: l[left_child_n], l[n] = l[n], l[left_child_n] # maintain loop invariant n = left_child_n left_child_n = left_child(n) else: # termination condition: val is bigger than all its children break # case 2: both left and right exists else: if val >= l[right_child_n] and val >= l[left_child_n]: # termination condition: val is bigger than all its children break else: # need to swap n with the bigger one big_child_n = left_child_n if l[left_child_n] < l[right_child_n]: big_child_n = right_child_n l[big_child_n], l[n] = l[n], l[big_child_n] # maintain loop invariant n = big_child_n left_child_n = left_child(n) return # heap sort # step 1 - heaplify length = len(l) for i in range(1, len(l)): heap_insert(l, i) # step 2 - pop for i in range(0, len(l)): heap_pop(l, length - 1 - i) return
Quick Sort: loop invariant to help track the pivot
def qsort(l, left, right): """ >>> l = [32, 64, 15, 38, 87, 13, 53, 12] >>> qsort(l, 0, len(l) - 1) >>> qsort(l, 0, len(l) - 1) >>> print(l) [12, 13, 15, 32, 38, 53, 64, 87] >>> l = prog_1_4_gen_list_2(1 << 8, 99) >>> qsort(l, 0, len(l) - 1) >>> test_verify_sorted(l) True """ def partition(l, left, right): # choose random pivot position and swap it to the first one pivot_pos = random.randint(left, right) l[pivot_pos], l[left] = l[left], l[pivot_pos] pivot_pos = left pivot_val = l[left] # loop to move all the big numbers to the right side of pivot, # and all the small numbers to the left side # # invariants: # - [low, high] indicates the range where the numbers are not handled, # i.e. [left, low) contains all small numbers and (high, right] contains all big ones. # - pivot_pos indicates the pivot position, it should be equivalent to either low or high # - initially low, high are set to be left, right to indicate no numbers are handled # - termination condition: low = high. it also indicates the final pivot pos low, high = left, right while low != high: if pivot_pos == low: # compare at higher end because at lower there's nothing to compare if l[high] < pivot_val: # move small number to right, and increment low to indicate # the left handled range extends l[low], l[high] = l[high], l[low] pivot_pos = high low += 1 else: # big number already at left, decrease high to indicate the # right handled range extends high -= 1 elif pivot_pos == high: # compare at lower end if l[low] > pivot_val: l[low], l[high] = l[high], l[low] pivot_pos = low high -= 1 else: low += 1 else: # assert could not happen assert(false) return pivot_pos # corner case: when left == right, partition function will work and return the only # element as pivot. Then the logic won't be broken if left > right: return pivot_pos = partition(l, left, right) qsort(l, left, pivot_pos - 1) qsort(l, pivot_pos + 1, right) return