Wednesday, July 20, 2016

Google interview: implement sort X and discuss its space/time complexity

The problem with sort algorithms is there are many tricks to making them quicker. These tricks also make it more complex to do in an interview.

I have failed a google interview in the past because of this, at that time my interviewer asked me to code a *stable* quick sort on the white board.. The moment it occurred to just how tricky the code will be to do on a white board i simply froze up and promptly forgot everything.. I went down the rabbit hole so to speak, worst interview ever..

So i have tried to keep the algorithms as simple as possible. My tactic here is to break them into an to easy to remember set of sub operations. They are not optimal versions.

#include <iostream>
#include <vector>
#include <functional>

// ***********************************************
// ************** ITERATIVE SORTS ****************
// ***********************************************

void insertionSort(std::vector<int>& list)
{
    // ok insertion sort takes current object a inserts it in the correct place
    // inplace (excluding swap memory)
    // stable ordering
    //  -- because the item only moves if less than.. equals stop the inner loop
    // average case: O(n^2)
    // -- because it has to do some percent of the inner loop lets say 0.5..
    //     hence 0.5*N*N is still O(N^2)
    // best case: O(n)
    //  -- sorted and nothing moves
    // worst case: O(n^2)
    //  -- reversed and everything moves

    for (int i = 1 ; i < list.size(); ++i)
    {
        // now swap item down until its in place
        int j = i;
        while (j > 0 and list[j] < list[j-1])
        {
            int temp = list[j];
            list[j] = list[j-1];
            list[j-1] = temp;
            --j;
        }
    }
}

void selectionSort(std::vector<int>& list)
{
    // selection sort finds the correct object and for the current place
    //
    // inplace
    // stable ordering
    //  -- because the first of of equal items is selected
    // average case: O(n^2)
    // -- same reason as best
    // best case: O(n^2)
    //  -- even sorted u have to check all items left over
    // worst case: O(n^2)
    //  -- everything moves

    for (int i = 0 ; i < list.size(); ++i)
    {
        int selection = i;
        for (int j = i; j < list.size(); ++j)
        {
            if (list[selection] > list[j])
                selection = j;
        }

        if (selection != i)
        {
            int temp = list[i];
            list[i] = list[selection];
            list[selection] = temp;
        }
    }
}

// ***********************************************
// *********** DIVIDE AND CONQUER SORTS **********
// ***********************************************

void swap(std::vector<int>::iterator a,
          std::vector<int>::iterator b)
{
    int temp = *a;
    *a = *b;
    *b = temp;
}

void quickSort(std::vector<int>::iterator begin,
               std::vector<int>::iterator end)
{
    // ok divide and conquer by partition on a pivot
    // inplace
    // unstable
    //  -- because pivot moves out of order and then we swap left end woith right end
    // average case O(nlogn)
    // -- we linear scan the array and swap, with hopefully leaves a 50/50
    //    split and recurse for log(n) deep
    // best case O(nlogn)
    // -- still have to scan for the swaps..
    // worst case O(n^2)
    // -- if the pivot value sucks it will split all 2 one side

    // the range is [begin, end)..
    if ((end - begin) < 2) return;

    // choose pivot.. lets say midway...
    std::vector<int>::iterator p = begin + (end - begin)/2;
    int pivot = *(p);

    std::vector<int>::iterator head = begin;
    std::vector<int>::iterator tail = end - 1;

    // swap partition out of the way - If you dont remove a value then *when*
    // partitioning fails you end up stuck in an infinite loop
    swap(p, tail);
    --tail;

    // partition
    while (head != tail)
    {
        if (*head <= pivot)
            // head is in right place move head up
            ++head;
        else
        {
            // swivel head to tail and move tail down
            swap(head, tail);
            --tail;
        }
    }

    // check last item againest the pivot
    if (*head <= pivot) ++head;

    // swap the pivot back to the middle.
    swap(head, end-1);

    // note carefully this is infinite loop avoidance...  the head now points
    // at the pivot so exclude that from recursion
    quickSort(begin, head);   // Note  [begin, head)
    quickSort(head+1, end);   // Note         (head, end)
}

std::vector<int>::iterator rotate(std::vector<int>::iterator begin,
                                  std::vector<int>::iterator mid,
                                  std::vector<int>::iterator end)
{
    std::vector<int>::iterator p1 = begin;
    std::vector<int>::iterator p2 = mid;

    while (1)
    {
        swap(p1,p2);

        ++p1;
        ++p2;

        if (p1 >= mid and p2 == end) break;
        if (p2 == end) p2 = mid;
    }

    return begin + (end - mid);
}

std::vector<int>::iterator stablePartition(std::vector<int>::iterator cursor,
                                           std::vector<int>::iterator end,
                                           std::function<bool (int)>  op)
{
    // Note "op" will be true if its in the left side

    // move through the data until we find the end of the *first* left
    // section
    while (cursor < end and op(*cursor)) ++cursor;

    // record the start of the right section
    std::vector<int>::iterator rightStart = cursor;

    // find the end of the right section and the start of out of order
    // left section
    while (cursor < end and not op(*cursor)) ++cursor;

    while (cursor < end)
    {
        // if we are not at the end of the entire range theb the opFlase secton
        // has to be *out of place*

        // So record the end of the out of place right section
        std::vector<int>::iterator rightEnd = cursor;

        // find end of the out of place left section
        while (cursor < end and op(*cursor)) ++cursor;

        // then rotate the out of order left section in front of the right
        // section, and update the rightStart to the new divide point
        rightStart = rotate(rightStart, rightEnd, cursor);

        // then move the cursor to the new end of the right section, starting
        // from the end of the prior searched region
        while (cursor < end and not op(*cursor)) ++cursor;
    }

    // return the divide point between the left and rightsections
    return rightStart;
}


void stableQuickSort(std::vector<int>::iterator begin,
                     std::vector<int>::iterator end)
{
    // divide and conqure using *stable* partitioning of data
    // the trick with stable quick sort is that you rotate the sequances of
    // data to instead of swap the start and end to keep the data in order.
    //
    // It is a real pain..  and tricky to get right...  and would be very
    // difficult to do in an interview because partition failure avoidance is
    // much harder..  u need to break the sort range into 3 parts so u can leave
    // out the middle an avoid the infinite loop when partitioning fails
    //
    // AND NOTE BEST i have been asked this in a interview with google before.. (and i failed)
    //
    // inplace
    // stable
    //  -- the use of rotates moves the entire sequence keeping order
    //  -- however care must be taken so that pivot is also stable.  The way I
    //     see it there is only 1 real option you have to rotate and form 3
    //     sections the lessThan, equalTo and greaterThan sections. You can
    //     then avoid infinite loops on partition failures by excluding the
    //     equalTo section from the recurse
    // average case O(log(n) log(n) n)
    // -- http://www.codeproject.com/Articles/26048/Fastest-In-Place-Stable-Sort
    // worst case O(n^3) (i think)
    // -- if the pivoit value sucks it will split all 2 one side and do N
    //    recursions, if that partition is N and a worst will repeatedly call
    //    rotate which is worst case N then we have N*N*N or N^3

    // a list of less than 2 items is sorted by default
    if ((end - begin) < 2) return;

    // choose pivot.. mid point...
    std::vector<int>::iterator p = begin + (end - begin)/2;

    // record the pivot value..
    int pivot = *(p);

    // form the lessThan and greaterThanEqualTo sections
    std::vector<int>::iterator greaterThanEqualToStart
        = stablePartition(begin,
                          end,
                          [pivot](int v) { return v < pivot; });

    // form the equalTo and greaterThan sections
    std::vector<int>::iterator greaterThanStart
        = stablePartition(greaterThanEqualToStart,
                          end,
                          [pivot](int v) { return not (pivot < v); });

    // now recurse on the unhanded lessThan and greaterThan partitions *only*
    stableQuickSort(begin,            greaterThanEqualToStart);
    stableQuickSort(greaterThanStart, end);
}

void mergeSort(std::vector<int>::iterator begin,
               std::vector<int>::iterator end,
               std::vector<int>& tmp)
{
    // divide and conqure sort using *merging* of data
    //
    // stable
    //  -- because the left side merges in before right if the values are the same
    // average case O(nlogn)
    // -- we always divide in 2 and recurse for log(n) deep then do 2n in merge actions
    // best case O(nlogn)
    // -- same as average..
    // worst case O(nlogn)
    // -- same as best and average
    // worst space complexity O(n)
    // -- in theory u can grow tmp as u need to min the space..but practically
    //    this may result in issues because resizing a large vector temporally
    //    2x the space as it has to copy the old buffer to the new one, you can
    //    avoid this by allocating new pages.. but most of time that not what i
    //    see in practice

    if ((end - begin) < 2) return;

    // split
    std::vector<int>::iterator n = begin + (end - begin)/2;

    // recurse
    mergeSort(begin,n,tmp);
    mergeSort(n,end,tmp);

    //merge
    std::vector<int>::iterator left   = begin;
    std::vector<int>::iterator right  = n;
    std::vector<int>::iterator wcursor = tmp.begin();

    // stop the moment left runs out there is no need to copy all of right to tmp
    while ( left < n )
    {
        if      (right == end)   *(wcursor++) = *(left++ );
        else if (*right < *left) *(wcursor++) = *(right++);
        else                     *(wcursor++) = *(left++ );
    }

    // copy the section we merged into tmp back out (note this may not be the full length)
    std::vector<int>::iterator ccursor = tmp.begin();
    while(ccursor != wcursor)
    {
        *(begin++) = *(ccursor++);
    }
}


void inplaceMergeSort(std::vector<int>::iterator begin,
                      std::vector<int>::iterator end)
{
    // divide and conquer sort using *inplace* merging of data
    // the trick with inplace merge sort is that you rotate the sequences of
    // data to instead of copy then into the new buffer
    //
    // inplace
    //  -- the use of rotates swaps data multiple times to keep the in the same locations
    // stable
    // average case O(log(n) log(n) n)
    // -- http://www.codeproject.com/Articles/26048/Fastest-In-Place-Stable-Sort

    if ((end - begin) < 2) return;

    // split
    std::vector<int>::iterator n = begin + (end - begin)/2;

    // recurse
    inplaceMergeSort(begin,n);
    inplaceMergeSort(n,end);

    //merge
    std::vector<int>::iterator left  = begin;
    std::vector<int>::iterator right = n;
    while ( left < right)
    {
        // find rotate start ..
        while (not (*right < *left) and left < right) ++left;
        if (not (left < right)) break;

        // note rotate mid
        std::vector<int>::iterator mid = right;

        // find rotate end
        while (*right < *left and right < end) ++right;
        if (mid == right) break;

        // swap the sections of the array
        left = rotate(left,mid,right);
    }
}

// ***********************************************
// ****************** HEAP SORT ******************
// ***********************************************

std::vector<int>::iterator parent(std::vector<int>::iterator begin,
                                  std::vector<int>::iterator i)
{
    return begin + (i - begin - 1)/2;
}

std::vector<int>::iterator child(std::vector<int>::iterator begin,
                                 std::vector<int>::iterator i)
{
    return begin + 2 * (i - begin) + 1;
}

void upShift(std::vector<int>::iterator begin,
             std::vector<int>::iterator i)
{
    while(begin != i)
    {
        std::vector<int>::iterator p = parent(begin, i);
        if (*p > *i) return;
        swap(p,i);

        i = p;
    }
}

void makeHeap(std::vector<int>::iterator begin,
              std::vector<int>::iterator end)
{
    // version 1: top down O(nlogn)..
    if((end - begin) < 2) return;

    for(std::vector<int>::iterator i = begin + 1;
        i != end;
        ++i)
    {
        upShift(begin, i);
    }
}

void reheap(std::vector<int>::iterator begin,
            std::vector<int>::iterator end)
{
    std::vector<int>::iterator i = begin;
    while(1)
    {
        std::vector<int>::iterator kid1 = child(begin, i);
        std::vector<int>::iterator kid2 = kid1 + 1;

        if (not (kid1 < end)) return;

        std::vector<int>::iterator kidmax
            = ((kid2 < end) and (*kid1 < *kid2)) ? kid2 : kid1;

        if (not (*kidmax > *i)) return;

        swap(kidmax, i);
        i = kidmax;
    }
}

void heapSort(std::vector<int>::iterator begin,
              std::vector<int>::iterator end)
{
    // Heap sort, is the same as selection. Ie:  Find the correct item for the
    // current location but there is a critical difference, heap sort prepares
    // the unsorted data as a tree that forms a priority index with the
    // larger numbers the parents of children nodes
    //
    // inplace
    // unstable
    //   -- the heap operators dont work to keep stable order
    // best case: O(nlogn)  ...
    //  -- building a heap can be done as O(n) but this implementation doesn't use it

    if ((end - begin) < 2) return;

    // make the heap
    makeHeap(begin,end);

    //then starting at the back
    std::vector<int>::iterator i = end - 1;
    while (i != begin) // dont bother with the last it has to be the largest
    {
        // move the largest to the end
        swap(i, begin);

        // and repair the heap
        reheap(begin,i);
        --i;
    }
}

// ***********************************************
// ************** OVERLOAD WRAPPERS **************
// ***********************************************

// should be a real overload but std::functional cant seem to match the
// overload requested

void quickSortWrap(std::vector<int>& list)
{
    quickSort(list.begin(), list.end());
}

void stableQuickSortWrap(std::vector<int>& list)
{
    stableQuickSort(list.begin(), list.end());
}

void mergeSortWrap(std::vector<int>& list)
{
    std::vector<int> tmp;
    tmp.resize(list.size());

    mergeSort(list.begin(), list.end(), tmp);
}

void inplaceMergeSortWrap(std::vector<int>& list)
{
    inplaceMergeSort(list.begin(), list.end());
}

void heapSortWrap(std::vector<int>& list)
{
    heapSort(list.begin(), list.end());
}

// ***********************************************
// ******************** TESTING ******************
// ***********************************************

bool check(std::vector<int>& list)
{
    bool failed = false;
    bool first = true;
    int prev;

    for ( int i : list)
    {
        if (first)
        {
            prev = i;
            first = false;
        }
        else if (i < prev)
        {
            std::cout << "*";
            failed = true;
        }
        prev = i;
        std::cout << i << " ";
    }
    std::cout << " :" << (failed ? "FAILED" : "passed") << "\n";
}

int testHeap(std::vector<int> data)
{
    // check indexing
    if (data.size() > 1)
    {
        // some hard checks
        // index 1 -> parent 0
        if (data.size() > 1)
            std::cout << " parent1: "
                      << ((parent(data.begin(), data.begin()+1) == data.begin()) ?
                          "passed" : "FAILED")
                      << "\n";

        // index 2 -> parent 0
        if (data.size() > 2)
            std::cout << " parent2: "
                      << ((parent(data.begin(), data.begin()+2) == data.begin()) ?
                          "passed" : "FAILED")
                      << "\n";

        // index 3 -> parent 1
        if (data.size() > 3)
            std::cout << " parent3: "
                      << ((parent(data.begin(), data.begin()+3) == (data.begin()+1)) ?
                          "passed" : "FAILED")
                      << "\n";

        // index 4 -> parent 1
        if (data.size() > 4)
            std::cout << " parent4: "
                      << ((parent(data.begin(), data.begin()+4) == (data.begin()+1)) ?
                          "passed" : "FAILED")
                      << "\n";

        // the soft general check
        bool failed = false;
        for (std::vector<int>::iterator i = data.begin() + 1;
             i != data.end();
             ++i)
        {
            std::vector<int>::iterator p    = parent(data.begin(),i);
            std::vector<int>::iterator kid1 = child(data.begin(),p);
            std::vector<int>::iterator kid2 = kid1 + 1;

            bool bad = (kid1 != i) and (kid2 != i);
            if (bad)
                std::cout << " indexing bad at"
                          << " i:" << (i-data.begin())
                          << " p:" << (p-data.begin())
                          << " k1:" << (kid1-data.begin())
                          << " k2:" << (kid2-data.begin())
                          << "\n";

            failed |= bad;
        }
        std::cout << " general heap indexing:" << (failed ? "FAILED" : "passed") << "\n";
    }

    // check heap making
    makeHeap(data.begin(),
             data.end());

    bool failed = false;

    if (data.size() > 0)
        std::cout << " # " << data[0];

    for (int i = 1; i < data.size(); ++i)
    {
        int p = (i-1)/2;
        bool bad = (data[i] > data[p]);
        failed |= bad;
        std::cout << (bad ? "|" : " ") << data[i];
    }

    std::cout <<" " << (failed ? "FAILED" : "passed")
              << "\n";
}

int testRotate()
{
    std::vector<int> data({1,2,3,4,5,6,7,8});

    std::vector<int> data1(data);
    rotate(data1.begin(),
           data1.begin()+2,
           data1.end());

    for (std::vector<int>::iterator a = data1.begin();
         a != data1.end();
         ++a)
        std::cout << " " << *a;
    std::cout << "\n";

    std::cout << (data1 == std::vector<int>({3,4,5,6,7,8,1,2}) ? "passed" : "FAILED")
              << "\n";

    std::vector<int> data2(data);
    rotate(data2.begin(),
           data2.begin()+6,
           data2.end());

    for (std::vector<int>::iterator a = data2.begin();
         a != data2.end();
         ++a)
        std::cout << " " << *a;
    std::cout << "\n";

    std::cout << (data2 == std::vector<int>({7,8,1,2,3,4,5,6}) ? "passed" : "FAILED")
              << "\n";
}

int test_one(std::vector<int> data,
             std::function<void (std::vector<int>&)> sort)
{
    std::vector<int> list(data);
    sort(list);
    check(list);
}

void test(std::initializer_list<int> data)
{
    std::cout << "testing heap parts\n";
    testHeap(data);

    std::cout << "testing sorts\n";
    test_one(data,insertionSort);
    test_one(data,selectionSort);
    test_one(data,mergeSortWrap);
    test_one(data,inplaceMergeSortWrap);
    test_one(data,quickSortWrap);
    test_one(data,stableQuickSortWrap);
    test_one(data,heapSortWrap);
}

int main()
{
    std::cout << "testing rotate\n";
    testRotate();

    std::cout << "test checker\n";
    std::vector<int> list1({6,7,8,8,6,5});
    check(list1);

    std::cout << "testing sets\n";
    test({});
    test({1});
    test({1,1}); // sort breaker

    test({1,2});
    test({2,1});

    test({6,7,8,8,6,5});

    test({2,2,2,2,2,2,2,2,2,2,2,2,2}); // sort breaker...

    test({3,5,2,6,8,3,2,6,5,3,4,8,7});
}

No comments:

Post a Comment