[Fight] Sort Algorithms

Ran into tons of production issues yesterday…still lots of ongoing issue. So I am going to keep the recent posts short. This one is for sort algorithms.

Insert Sort 1: use binary search to find the insert point

def insert_sort(l):
    """
    >>> l = [32, 64, 15, 38, 87, 13, 53, 12]
    >>> insert_sort(l)
    >>> print(l)
    [12, 13, 15, 32, 38, 53, 64, 87]
    >>> l = prog_1_4_gen_list_2(1 << 8, 99)
    >>> insert_sort(l)
    >>> test_verify_sorted(l)
    True
    """
    # loop invariant:
    # i indicates l[0:i] is sorted and l[i] needs to be inserted
    # init: i = 1
    # terminate: i = len(l)
    for i in range(1, len(l)):
        val = l[i]
        # find insert point
        insert_point = prog_2_A_binary_search(l[0:i], l[i], True)
        # right shift the l[insert_point:i] by 1
        # loop invariant:
        # j indicates the current empty position, and l[j-1] needs to be shifted
        # init: j = i
        # termination: j = insert_point
        for j in range(i, insert_point, -1):
            l[j] = l[j - 1]
        # insert the value into the position
        l[insert_point] = val

Insert Sort 2: scan back while shifting

def insert_sort2(l):
    """
    >>> l = [32, 64, 15, 38, 87, 13, 53, 12]
    >>> insert_sort2(l)
    >>> print(l)
    [12, 13, 15, 32, 38, 53, 64, 87]
    """
    # loop invariant:
    # i indicates l[0:i] is sorted and l[i] needs to be inserted
    # init: i = 1
    # termination: i = len(l)
    for i in range(1, len(l)):
        val = l[i]
        # find insert point, while shifting from left
        # loop invariant, j indicates:
        # - current empty position, and l[j-1] needs to be shifted
        # - l[j:i] contains all elements > val
        # - l[0:j] are to be scanned
        # termination: j == 0 (no l[j - 1]), or l[j - 1] > val
        j = i
        while j > 0 and val < l[j - 1]:
            l[j] = l[j - 1]
            j -= 1
        # need to insert the value to the insert point
        l[j] = val

Heap Sort: heap_insert and heap_pop primitives

def heap_sort(l):
    """
    >>> l = [106, 136, 124, 63, 76, 147, 92, 35]
    >>> heap_sort(l)
    >>> print(l)
    [35, 63, 76, 92, 106, 124, 136, 147]
    >>> l = prog_1_4_gen_list_2(1 << 8, 99)
    >>> heap_sort(l)
    >>> test_verify_sorted(l)
    True
    """
    def heap_insert(l, end):
        # input contract:
        # - l[0:end] is already heap
        # - end is the new element to insert
        # output contact:
        # l[0:end+1] is heap
        #
        # process: loop to 'bubble up' the l[end] element.
        # invariant:
        # - p to indicate the current parent node to compare
        # - pos to indicate the current position of the new element
        # - terminal condition: p is out of scope, or l[p] is already bigger
        def parent(pos):
            return (pos-1) >> 1
        assert(end >= 1)
        p = parent(end)
        pos = end
        val = l[pos]
        while p >= 0 and l[p] < val:
            l[p], l[pos] = l[pos], l[p]
            pos = p
            p = parent(pos)
        return
    def heap_pop(l, end):
        # input contract (assuming len = len(l)):
        # - l[0:end+1] is heap
        # output contract:
        # - l[0:end] is heap
        # - l[end] is the element that is pop up
        #
        # firstly, move the last element to be the new heap top;
        l[0], l[end] = l[end], l[0]
        # secondly, 'bubble down' the top element
        # loop invariant:
        # - n to indicate the current location of the element
        # termination: n is the leaf node, or n is already bigger
        def left_child(pos):
            return (n << 1) + 1
        n = 0
        val = l[n]
        left_child_n = left_child(n)
        while left_child_n < end:
            right_child_n = left_child_n + 1
            # case 1: only left child exists
            if right_child_n >= end:
                if l[left_child_n] > val:
                    l[left_child_n], l[n] = l[n], l[left_child_n]
                    # maintain loop invariant
                    n = left_child_n
                    left_child_n = left_child(n)
                else:
                    # termination condition: val is bigger than all its children
                    break
            # case 2: both left and right exists
            else:
                if val >= l[right_child_n] and val >= l[left_child_n]:
                    # termination condition: val is bigger than all its children
                    break
                else:
                    # need to swap n with the bigger one
                    big_child_n = left_child_n
                    if l[left_child_n] < l[right_child_n]:
                        big_child_n = right_child_n
                    l[big_child_n], l[n] = l[n], l[big_child_n]
                    # maintain loop invariant
                    n = big_child_n
                    left_child_n = left_child(n)
        return
    # heap sort
    # step 1 - heaplify
    length = len(l)
    for i in range(1, len(l)):
        heap_insert(l, i)
    # step 2 - pop
    for i in range(0, len(l)):
        heap_pop(l, length - 1 - i)
    return

Quick Sort: loop invariant to help track the pivot

def qsort(l, left, right):
    """
    >>> l = [32, 64, 15, 38, 87, 13, 53, 12]
    >>> qsort(l, 0, len(l) - 1)
    >>> qsort(l, 0, len(l) - 1)
    >>> print(l)
    [12, 13, 15, 32, 38, 53, 64, 87]
    >>> l = prog_1_4_gen_list_2(1 << 8, 99)
    >>> qsort(l, 0, len(l) - 1)
    >>> test_verify_sorted(l)
    True
    """
    def partition(l, left, right):
        # choose random pivot position and swap it to the first one
        pivot_pos = random.randint(left, right)
        l[pivot_pos], l[left] = l[left], l[pivot_pos]
        pivot_pos = left
        pivot_val = l[left]
        # loop to move all the big numbers to the right side of pivot,
        # and all the small numbers to the left side
        #
        # invariants:
        # - [low, high] indicates the range where the numbers are not handled,
        #   i.e. [left, low) contains all small numbers and (high, right] contains all big ones.
        # - pivot_pos indicates the pivot position, it should be equivalent to either low or high
        # - initially low, high are set to be left, right to indicate no numbers are handled
        # - termination condition: low = high. it also indicates the final pivot pos
        low, high = left, right
        while low != high:
            if pivot_pos == low:
                # compare at higher end because at lower there's nothing to compare
                if l[high] < pivot_val:
                    # move small number to right, and increment low to indicate
                    # the left handled range extends
                    l[low], l[high] = l[high], l[low]
                    pivot_pos = high
                    low += 1
                else:
                    # big number already at left, decrease high to indicate the
                    # right handled range extends
                    high -= 1
            elif pivot_pos == high:
                # compare at lower end
                if l[low] > pivot_val:
                    l[low], l[high] = l[high], l[low]
                    pivot_pos = low
                    high -= 1
                else:
                    low += 1
            else:
                # assert could not happen
                assert(false)
        return pivot_pos
    # corner case: when left == right, partition function will work and return the only
    # element as pivot. Then the logic won't be broken
    if left > right:
        return
    pivot_pos = partition(l, left, right)
    qsort(l, left, pivot_pos - 1)
    qsort(l, pivot_pos + 1, right)
    return

Leave a Reply

Your email address will not be published.

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>