# [Fight] Sort Algorithms

Ran into tons of production issues yesterday…still lots of ongoing issue. So I am going to keep the recent posts short. This one is for sort algorithms.

Insert Sort 1: use binary search to find the insert point

```def insert_sort(l):
"""
>>> l = [32, 64, 15, 38, 87, 13, 53, 12]
>>> insert_sort(l)
>>> print(l)
[12, 13, 15, 32, 38, 53, 64, 87]
>>> l = prog_1_4_gen_list_2(1 << 8, 99)
>>> insert_sort(l)
>>> test_verify_sorted(l)
True
"""
# loop invariant:
# i indicates l[0:i] is sorted and l[i] needs to be inserted
# init: i = 1
# terminate: i = len(l)
for i in range(1, len(l)):
val = l[i]
# find insert point
insert_point = prog_2_A_binary_search(l[0:i], l[i], True)
# right shift the l[insert_point:i] by 1
# loop invariant:
# j indicates the current empty position, and l[j-1] needs to be shifted
# init: j = i
# termination: j = insert_point
for j in range(i, insert_point, -1):
l[j] = l[j - 1]
# insert the value into the position
l[insert_point] = val
```

Insert Sort 2: scan back while shifting

```def insert_sort2(l):
"""
>>> l = [32, 64, 15, 38, 87, 13, 53, 12]
>>> insert_sort2(l)
>>> print(l)
[12, 13, 15, 32, 38, 53, 64, 87]
"""
# loop invariant:
# i indicates l[0:i] is sorted and l[i] needs to be inserted
# init: i = 1
# termination: i = len(l)
for i in range(1, len(l)):
val = l[i]
# find insert point, while shifting from left
# loop invariant, j indicates:
# - current empty position, and l[j-1] needs to be shifted
# - l[j:i] contains all elements > val
# - l[0:j] are to be scanned
# termination: j == 0 (no l[j - 1]), or l[j - 1] > val
j = i
while j > 0 and val < l[j - 1]:
l[j] = l[j - 1]
j -= 1
# need to insert the value to the insert point
l[j] = val
```

Heap Sort: heap_insert and heap_pop primitives

```def heap_sort(l):
"""
>>> l = [106, 136, 124, 63, 76, 147, 92, 35]
>>> heap_sort(l)
>>> print(l)
[35, 63, 76, 92, 106, 124, 136, 147]
>>> l = prog_1_4_gen_list_2(1 << 8, 99)
>>> heap_sort(l)
>>> test_verify_sorted(l)
True
"""
def heap_insert(l, end):
# input contract:
# - l[0:end] is already heap
# - end is the new element to insert
# output contact:
# l[0:end+1] is heap
#
# process: loop to 'bubble up' the l[end] element.
# invariant:
# - p to indicate the current parent node to compare
# - pos to indicate the current position of the new element
# - terminal condition: p is out of scope, or l[p] is already bigger
def parent(pos):
return (pos-1) >> 1
assert(end >= 1)
p = parent(end)
pos = end
val = l[pos]
while p >= 0 and l[p] < val:
l[p], l[pos] = l[pos], l[p]
pos = p
p = parent(pos)
return
def heap_pop(l, end):
# input contract (assuming len = len(l)):
# - l[0:end+1] is heap
# output contract:
# - l[0:end] is heap
# - l[end] is the element that is pop up
#
# firstly, move the last element to be the new heap top;
l, l[end] = l[end], l
# secondly, 'bubble down' the top element
# loop invariant:
# - n to indicate the current location of the element
# termination: n is the leaf node, or n is already bigger
def left_child(pos):
return (n << 1) + 1
n = 0
val = l[n]
left_child_n = left_child(n)
while left_child_n < end:
right_child_n = left_child_n + 1
# case 1: only left child exists
if right_child_n >= end:
if l[left_child_n] > val:
l[left_child_n], l[n] = l[n], l[left_child_n]
# maintain loop invariant
n = left_child_n
left_child_n = left_child(n)
else:
# termination condition: val is bigger than all its children
break
# case 2: both left and right exists
else:
if val >= l[right_child_n] and val >= l[left_child_n]:
# termination condition: val is bigger than all its children
break
else:
# need to swap n with the bigger one
big_child_n = left_child_n
if l[left_child_n] < l[right_child_n]:
big_child_n = right_child_n
l[big_child_n], l[n] = l[n], l[big_child_n]
# maintain loop invariant
n = big_child_n
left_child_n = left_child(n)
return
# heap sort
# step 1 - heaplify
length = len(l)
for i in range(1, len(l)):
heap_insert(l, i)
# step 2 - pop
for i in range(0, len(l)):
heap_pop(l, length - 1 - i)
return
```

Quick Sort: loop invariant to help track the pivot

```def qsort(l, left, right):
"""
>>> l = [32, 64, 15, 38, 87, 13, 53, 12]
>>> qsort(l, 0, len(l) - 1)
>>> qsort(l, 0, len(l) - 1)
>>> print(l)
[12, 13, 15, 32, 38, 53, 64, 87]
>>> l = prog_1_4_gen_list_2(1 << 8, 99)
>>> qsort(l, 0, len(l) - 1)
>>> test_verify_sorted(l)
True
"""
def partition(l, left, right):
# choose random pivot position and swap it to the first one
pivot_pos = random.randint(left, right)
l[pivot_pos], l[left] = l[left], l[pivot_pos]
pivot_pos = left
pivot_val = l[left]
# loop to move all the big numbers to the right side of pivot,
# and all the small numbers to the left side
#
# invariants:
# - [low, high] indicates the range where the numbers are not handled,
#   i.e. [left, low) contains all small numbers and (high, right] contains all big ones.
# - pivot_pos indicates the pivot position, it should be equivalent to either low or high
# - initially low, high are set to be left, right to indicate no numbers are handled
# - termination condition: low = high. it also indicates the final pivot pos
low, high = left, right
while low != high:
if pivot_pos == low:
# compare at higher end because at lower there's nothing to compare
if l[high] < pivot_val:
# move small number to right, and increment low to indicate
# the left handled range extends
l[low], l[high] = l[high], l[low]
pivot_pos = high
low += 1
else:
# big number already at left, decrease high to indicate the
# right handled range extends
high -= 1
elif pivot_pos == high:
# compare at lower end
if l[low] > pivot_val:
l[low], l[high] = l[high], l[low]
pivot_pos = low
high -= 1
else:
low += 1
else:
# assert could not happen
assert(false)
return pivot_pos
# corner case: when left == right, partition function will work and return the only
# element as pivot. Then the logic won't be broken
if left > right:
return
pivot_pos = partition(l, left, right)
qsort(l, left, pivot_pos - 1)
qsort(l, pivot_pos + 1, right)
return
```

# [Programming Pearls] Column 4 – Writing Correct Programs

The column mainly describes the topic on how to write a correct program.

Writing a program
On writing programs, it illustrates a top-down way: write high-level descriptive (declarative) p-code; and then for each p-code statement, break it into next level with more details; continue the process until the program is composed.

While top-down way in general is effective especially when you are solving algorithm problems where problem can be clearly defined (at some level…). Paul Graham and other lisp hackers usually like to mention the way bottom-up. IMHO, the bottom-up way, is generally applied in cases where the problem you try to solve is too big (like for example, design a CAD software) and unclear at some point. For such kind of problems, it is hard to grasp or define the problem clearly. The bottom-up will start by exploring problem domain, defining domain primitives and ‘buzzword’ (like to to rotate a line, then you discovered there will be strong need on matrix calculation features), helping you to capture and really understand the domain, and then finally solve the problem while defining / discovering it (meaning, define a IT solution for given business requirement, mostly described by business / domain people).

That went too far so I’ll stop here…

Validate a program
Mostly it’s bottom up and is based on assertions on invariants. According to different program structures, the invariants vary. Like typically for a loop structure, the invariants can be divided into – init, preserve and termination. For a function, (contract-based) it is more on input params and return values (output params). Anyway, words do not mean anything so I’ll just give an example of an incorrect program where the error is actually caused because function invariable is not maintained. It’s a simple recursive based binary search.

```def prog_2_A_binary_search_recur_incorrect(l, a):
# termination
if len(l) == 0:
return -1
# binary search
mid = (len(l) - 1) >> 1
mid_val = l[mid]
if a > mid_val:
return prog_2_A_binary_search_recur(l[mid + 1 : ], a)
elif a == mid_val:
return mid
else:
return prog_2_A_binary_search_recur(l[ : mid], a)
```

The error is “mid” is actually related to current input l related to current recursive level, not the original l. So the fix is to add a ‘ref’ in there and maintain the offset like:

```def prog_2_A_binary_search_recur(l, a, ref = 0):
"""
>>> l = [1,20,34,45,76,87,90,100,103,298,500]
>>> print(prog_2_A_binary_search_recur(l, 298))
9
>>> print(prog_2_A_binary_search_recur(l, 999))
-1
"""
# termination
if len(l) == 0:
return -1
# binary search
mid = (len(l) - 1) >> 1
mid_val = l[mid]
if a > mid_val:
return prog_2_A_binary_search_recur(l[mid + 1 : ], a, ref + mid + 1)
elif a == mid_val:
return ref + mid
else:
return prog_2_A_binary_search_recur(l[ : mid], a, ref)
```

# [Programming Pearls] Column 1 – Non Dup Random Numbers

Source:
Problem 4

Problem:
Generate k random numbers with no duplicates

Solution 1:

```def prog_1_4_gen_list(max, count):
i = 0
v = prog_1_2_bitvec(max)
l = []
while (i < count):
num = random.randint(0, max - 1)
if not v.has(num):
v.set(num)
i += 1
l.append(num)
return l
```

Solution 2:
In each cycle of the loop:
- numbers[i + 1 :] is non-dup number pool
- numbers[i] will be a number that randomly chosen from number pool

```def prog_1_4_gen_list_2(max, count):
numbers = [n for n in range(0, max)]
for i in range(0, count):
randnum = random.randint(0, max - 1)
numbers[i], numbers[randnum] = numbers[randnum], numbers[i]
return numbers[: count]
```