Category Archives: Uncategorized

[Fight] Sort Algorithms

Ran into tons of production issues yesterday…still lots of ongoing issue. So I am going to keep the recent posts short. This one is for sort algorithms.

Insert Sort 1: use binary search to find the insert point

def insert_sort(l):
    """
    >>> l = [32, 64, 15, 38, 87, 13, 53, 12]
    >>> insert_sort(l)
    >>> print(l)
    [12, 13, 15, 32, 38, 53, 64, 87]
    >>> l = prog_1_4_gen_list_2(1 << 8, 99)
    >>> insert_sort(l)
    >>> test_verify_sorted(l)
    True
    """
    # loop invariant:
    # i indicates l[0:i] is sorted and l[i] needs to be inserted
    # init: i = 1
    # terminate: i = len(l)
    for i in range(1, len(l)):
        val = l[i]
        # find insert point
        insert_point = prog_2_A_binary_search(l[0:i], l[i], True)
        # right shift the l[insert_point:i] by 1
        # loop invariant:
        # j indicates the current empty position, and l[j-1] needs to be shifted
        # init: j = i
        # termination: j = insert_point
        for j in range(i, insert_point, -1):
            l[j] = l[j - 1]
        # insert the value into the position
        l[insert_point] = val

Insert Sort 2: scan back while shifting

def insert_sort2(l):
    """
    >>> l = [32, 64, 15, 38, 87, 13, 53, 12]
    >>> insert_sort2(l)
    >>> print(l)
    [12, 13, 15, 32, 38, 53, 64, 87]
    """
    # loop invariant:
    # i indicates l[0:i] is sorted and l[i] needs to be inserted
    # init: i = 1
    # termination: i = len(l)
    for i in range(1, len(l)):
        val = l[i]
        # find insert point, while shifting from left
        # loop invariant, j indicates:
        # - current empty position, and l[j-1] needs to be shifted
        # - l[j:i] contains all elements > val
        # - l[0:j] are to be scanned
        # termination: j == 0 (no l[j - 1]), or l[j - 1] > val
        j = i
        while j > 0 and val < l[j - 1]:
            l[j] = l[j - 1]
            j -= 1
        # need to insert the value to the insert point
        l[j] = val

Heap Sort: heap_insert and heap_pop primitives

def heap_sort(l):
    """
    >>> l = [106, 136, 124, 63, 76, 147, 92, 35]
    >>> heap_sort(l)
    >>> print(l)
    [35, 63, 76, 92, 106, 124, 136, 147]
    >>> l = prog_1_4_gen_list_2(1 << 8, 99)
    >>> heap_sort(l)
    >>> test_verify_sorted(l)
    True
    """
    def heap_insert(l, end):
        # input contract:
        # - l[0:end] is already heap
        # - end is the new element to insert
        # output contact:
        # l[0:end+1] is heap
        #
        # process: loop to 'bubble up' the l[end] element.
        # invariant:
        # - p to indicate the current parent node to compare
        # - pos to indicate the current position of the new element
        # - terminal condition: p is out of scope, or l[p] is already bigger
        def parent(pos):
            return (pos-1) >> 1
        assert(end >= 1)
        p = parent(end)
        pos = end
        val = l[pos]
        while p >= 0 and l[p] < val:
            l[p], l[pos] = l[pos], l[p]
            pos = p
            p = parent(pos)
        return
    def heap_pop(l, end):
        # input contract (assuming len = len(l)):
        # - l[0:end+1] is heap
        # output contract:
        # - l[0:end] is heap
        # - l[end] is the element that is pop up
        #
        # firstly, move the last element to be the new heap top;
        l[0], l[end] = l[end], l[0]
        # secondly, 'bubble down' the top element
        # loop invariant:
        # - n to indicate the current location of the element
        # termination: n is the leaf node, or n is already bigger
        def left_child(pos):
            return (n << 1) + 1
        n = 0
        val = l[n]
        left_child_n = left_child(n)
        while left_child_n < end:
            right_child_n = left_child_n + 1
            # case 1: only left child exists
            if right_child_n >= end:
                if l[left_child_n] > val:
                    l[left_child_n], l[n] = l[n], l[left_child_n]
                    # maintain loop invariant
                    n = left_child_n
                    left_child_n = left_child(n)
                else:
                    # termination condition: val is bigger than all its children
                    break
            # case 2: both left and right exists
            else:
                if val >= l[right_child_n] and val >= l[left_child_n]:
                    # termination condition: val is bigger than all its children
                    break
                else:
                    # need to swap n with the bigger one
                    big_child_n = left_child_n
                    if l[left_child_n] < l[right_child_n]:
                        big_child_n = right_child_n
                    l[big_child_n], l[n] = l[n], l[big_child_n]
                    # maintain loop invariant
                    n = big_child_n
                    left_child_n = left_child(n)
        return
    # heap sort
    # step 1 - heaplify
    length = len(l)
    for i in range(1, len(l)):
        heap_insert(l, i)
    # step 2 - pop
    for i in range(0, len(l)):
        heap_pop(l, length - 1 - i)
    return

Quick Sort: loop invariant to help track the pivot

def qsort(l, left, right):
    """
    >>> l = [32, 64, 15, 38, 87, 13, 53, 12]
    >>> qsort(l, 0, len(l) - 1)
    >>> qsort(l, 0, len(l) - 1)
    >>> print(l)
    [12, 13, 15, 32, 38, 53, 64, 87]
    >>> l = prog_1_4_gen_list_2(1 << 8, 99)
    >>> qsort(l, 0, len(l) - 1)
    >>> test_verify_sorted(l)
    True
    """
    def partition(l, left, right):
        # choose random pivot position and swap it to the first one
        pivot_pos = random.randint(left, right)
        l[pivot_pos], l[left] = l[left], l[pivot_pos]
        pivot_pos = left
        pivot_val = l[left]
        # loop to move all the big numbers to the right side of pivot,
        # and all the small numbers to the left side
        #
        # invariants:
        # - [low, high] indicates the range where the numbers are not handled,
        #   i.e. [left, low) contains all small numbers and (high, right] contains all big ones.
        # - pivot_pos indicates the pivot position, it should be equivalent to either low or high
        # - initially low, high are set to be left, right to indicate no numbers are handled
        # - termination condition: low = high. it also indicates the final pivot pos
        low, high = left, right
        while low != high:
            if pivot_pos == low:
                # compare at higher end because at lower there's nothing to compare
                if l[high] < pivot_val:
                    # move small number to right, and increment low to indicate
                    # the left handled range extends
                    l[low], l[high] = l[high], l[low]
                    pivot_pos = high
                    low += 1
                else:
                    # big number already at left, decrease high to indicate the
                    # right handled range extends
                    high -= 1
            elif pivot_pos == high:
                # compare at lower end
                if l[low] > pivot_val:
                    l[low], l[high] = l[high], l[low]
                    pivot_pos = low
                    high -= 1
                else:
                    low += 1
            else:
                # assert could not happen
                assert(false)
        return pivot_pos
    # corner case: when left == right, partition function will work and return the only
    # element as pivot. Then the logic won't be broken
    if left > right:
        return
    pivot_pos = partition(l, left, right)
    qsort(l, left, pivot_pos - 1)
    qsort(l, pivot_pos + 1, right)
    return

[Programming Pearls] Column 4 – Writing Correct Programs

The column mainly describes the topic on how to write a correct program.

Writing a program
On writing programs, it illustrates a top-down way: write high-level descriptive (declarative) p-code; and then for each p-code statement, break it into next level with more details; continue the process until the program is composed.

While top-down way in general is effective especially when you are solving algorithm problems where problem can be clearly defined (at some level…). Paul Graham and other lisp hackers usually like to mention the way bottom-up. IMHO, the bottom-up way, is generally applied in cases where the problem you try to solve is too big (like for example, design a CAD software) and unclear at some point. For such kind of problems, it is hard to grasp or define the problem clearly. The bottom-up will start by exploring problem domain, defining domain primitives and ‘buzzword’ (like to to rotate a line, then you discovered there will be strong need on matrix calculation features), helping you to capture and really understand the domain, and then finally solve the problem while defining / discovering it (meaning, define a IT solution for given business requirement, mostly described by business / domain people).

That went too far so I’ll stop here…

Validate a program
Mostly it’s bottom up and is based on assertions on invariants. According to different program structures, the invariants vary. Like typically for a loop structure, the invariants can be divided into – init, preserve and termination. For a function, (contract-based) it is more on input params and return values (output params). Anyway, words do not mean anything so I’ll just give an example of an incorrect program where the error is actually caused because function invariable is not maintained. It’s a simple recursive based binary search.

def prog_2_A_binary_search_recur_incorrect(l, a):
    # termination
    if len(l) == 0:
        return -1
    # binary search
    mid = (len(l) - 1) >> 1
    mid_val = l[mid]
    if a > mid_val:
        return prog_2_A_binary_search_recur(l[mid + 1 : ], a)
    elif a == mid_val:
        return mid
    else:
        return prog_2_A_binary_search_recur(l[ : mid], a)

The error is “mid” is actually related to current input l related to current recursive level, not the original l. So the fix is to add a ‘ref’ in there and maintain the offset like:

def prog_2_A_binary_search_recur(l, a, ref = 0):
    """
    >>> l = [1,20,34,45,76,87,90,100,103,298,500]
    >>> print(prog_2_A_binary_search_recur(l, 298))
    9
    >>> print(prog_2_A_binary_search_recur(l, 999))
    -1
    """
    # termination
    if len(l) == 0:
        return -1
    # binary search
    mid = (len(l) - 1) >> 1
    mid_val = l[mid]
    if a > mid_val:
        return prog_2_A_binary_search_recur(l[mid + 1 : ], a, ref + mid + 1)
    elif a == mid_val:
        return ref + mid
    else:
        return prog_2_A_binary_search_recur(l[ : mid], a, ref)

[Programming Pearls] Column 1 – Non Dup Random Numbers

Source:
Problem 4

Problem:
Generate k random numbers with no duplicates

Solution 1:

def prog_1_4_gen_list(max, count):
    # TODO: avoid dead loop....
    i = 0
    v = prog_1_2_bitvec(max)
    l = []
    while (i < count):
        num = random.randint(0, max - 1)
        if not v.has(num):
            v.set(num)
            i += 1
            l.append(num)
    return l

Solution 2:
In each cycle of the loop:
- numbers[i + 1 :] is non-dup number pool
- numbers[i] will be a number that randomly chosen from number pool

def prog_1_4_gen_list_2(max, count):
    numbers = [n for n in range(0, max)]
    for i in range(0, count):
        randnum = random.randint(0, max - 1)
        numbers[i], numbers[randnum] = numbers[randnum], numbers[i]
    return numbers[: count]