## Saturday, August 13, 2016

### google interview: efficiently find a number in a partial sorted list

// http://www.impactinterview.com/2009/10/140-google-interview-questions/#software_engineer
// You are given a list of numbers.  When you reach the end of the list you
// will come back to the beginning of the list (a circular list).  Write the
// most efficient algorithm to find the minimum # in this list.  Find any given
// # in the list.  The numbers in the list are always increasing but you
// don’t know where the circular list begins, ie: 38, 40, 55, 89, 6, 13, 20,
// 23, 36.

// First of all note very carefully this is for a *unidirectional circular linked
// list* if we alter the design of the data structure there are much better ways
// to do this.. but i believe that is out of bounds as a potential answer.

// hmm..  essentially the find min number algorithm is a component of the find
// any number algorithm..  Since the question isn't written as find the min,
// then fix the list start and then find any # ill assume that it was meant as
// a helper step to get to the find algorithm..  so in the interest of getting
// it done lets just cut to the find any # algorithm and code a few evolutions
// showing efficiency improvements..

#include <iostream>
#include <vector>
#include <chrono>
#include <functional>

// ****************************************
// *************** CYCLIC LIST ************
// ****************************************

class List
{
std::vector<int> data_;
int offset_;

public:
class Iterator
{
List* list_;
int index_;

public:
Iterator() :
list_(NULL),
index_(-1)
{}

Iterator(List* list) :
list_(list),
index_(list->offset_)
{}

// WARNING DO NOT USE in find.. that was not spec'ed
// this is for testing only
Iterator(List* list, int offset) :
list_(list),
index_(offset)
{}

Iterator& operator++()
{
index_ = (index_ + 1) % list_->data_.size();
return *this;
}

Iterator operator+(int delta)
{
int offset = (index_ + delta) % list_->data_.size();
while (offset < 0) offset += list_->data_.size();

return Iterator(list_, offset);
}

// WARNING DO NOT UE in find.. that was not spec'ed
// this is for testing only
Iterator& operator--()
{
index_ = (list_->data_.size() + index_ - 1) % list_->data_.size();
return *this;
}

int operator*()
{
return list_->data_[index_];
}

bool operator==(const Iterator& other) const
{
return other.list_ == list_ and
other.index_ == index_;
}

bool operator!=(const Iterator& other) const
{
return not (other == *this);
}

bool valid() const
{
return list_ != NULL;
}
};

List(std::initializer_list<int> data,
int offset = 0) :
data_(data),
offset_(offset)
{}

List(int size) :
data_(size),
offset_(rand() % size)
{
// random fill
int value = 0;
for (int i = 0; i < size; ++i)
{
value += 2 + rand() % 5;
data_[i] = value;
}
}

bool empty() const
{
return data_.empty();
}

Iterator begin()
{
return Iterator(this);
}

Iterator random()
{
return Iterator(this, rand() % data_.size());
}
};

// ****************************************
// *************** FIND NAIVE *************
// ****************************************

List::Iterator find_naive(List& data, int target)
{
if (data.empty()) return List::Iterator();

List::Iterator start = data.begin();
List::Iterator i = start;

do
{
if (*i == target) return i;
++i;
}
while (i != start);

return List::Iterator();
}

// ****************************************
// ************* FIND NAIVE V2 ************
// ****************************************

// Attempt to simply the code a bit so that we handle the cyclic iteration and
// partial sorted issue cleaner for latter scaling

List::Iterator find_naive2(List& data, int target)
{
if (data.empty()) return  List::Iterator();

//termination is an issue because start *is* the end ..
// simplify this by checking the start..
List::Iterator end = data.begin();
if (target == *end) return end;

// then start at the first item after and now "start" is really the end
List::Iterator i = end + 1;

while (i != end and *i != target) ++i;
if (*i == target) return i;

return  List::Iterator();
}

// Of course you could try a divide and conquer idea..  but your dealing with a
// list that you want to divide in half.  Dividing it it half means you need to
// step the iterator the mid point..  So assuming you know the size of the list
// the total search on a fully sorted list will *always* cost u n/2 + n/4 + n/8
// ...  moves. Which totals to n-1 (ie if n=8 is 4+2+1=7)..  So the performance
// is actually worst on average for the naive version ... of course this might
// not be true because the cost of iterators might be very low compared to the
// compare.. but i dont think it is... so im skipping it..

// ****************************************
// ****************************************

// What we can really do is try to take advantage of the sorted order of data
// and terminate early..  In order to do that what you have to do is skip to
// the correct sorted part for your target and then search... (hence the find
// min number stuff before)

// Keep in mind this gets much less robust to problems with the data..  ie
// if the list isnt sort (and parted) before hand..

List::Iterator find_skip_start(List& data, int target)
{
// handle the empty dataset case..
if (data.empty()) return  List::Iterator();

//termination is an issue because start *is* the end ..
// simplify this by checking the start..
List::Iterator end = data.begin();
if (target == *end) return end;

// then start at the first item after and now "start" is really the end
List::Iterator i = end + 1;

// if the target is in the second part
if (target < *end)
{
int split_edge = *end;
while (*i >= split_edge and i != end) ++i;
}

// then search for the target
while (*i < target and i != end) ++i;

if (*i == target) return i;
return  List::Iterator();
}

// ****************************************
// ******* DOUBLE SKIP IN SECTION *********
// ****************************************

// Now because its sorted you dont have to check items every time you move
// forward.  You can step and then roll back if you went to far..  this makes
// some big assumptions about the cost of iterator movement vs integer
// compares.. my "List" class isnt great but it works out.. and in
// theory should cut out every second compare..

List::Iterator find_double_skip(List& data, int target)
{
// handle the empty dataset case..
if (data.empty()) return  List::Iterator();

//termination is an issue because start *is* the end ..
// simplify this by checking the start..
List::Iterator end = data.begin();
if (target == *end) return end;

// then start at the first item after and now "start" is really the end
List::Iterator i  = end + 1;

if (target < *end)
{
int split_edge = *end;

List::Iterator iprev;
while (*i >= split_edge and i != end)
{
++i;
iprev = i;
if (i != end) ++i;
}

// check if we need to back up the double step.
if (iprev.valid() and *iprev < split_edge) i = iprev;
}

{
// double skip till target found or exceeded
List::Iterator iprev;
while (*i < target and i != end)
{
++i;
iprev = i;
if (i != end) ++i;
}

// check if found the target at either of the pointers..
if (iprev.valid() and *iprev == target) return iprev;
if (*i == target) return i;
}

return  List::Iterator();
}

// ****************************************
// ********** N SKIP IN SECTION ***********
// ****************************************

// Optimization..  of course if u can do better with steps of 2 then u can
// generalize it to any number of steps..  for example using steps of 20..  now
// we are really playing balance games..  trading the speeds of various
// comparisons, assignments etc..  Then u have to tune the size of skip for

List::Iterator find_n_skip(List& data, int target)
{
const int skip = 20;

// handle the empty dataset case..
if (data.empty()) return  List::Iterator();

//termination is an issue because start *is* the end ..
// simplify this by checking the start..
List::Iterator end = data.begin();
if (target == *end) return end;

// then start at the first item after and now "start" is really the end
List::Iterator i  = end + 1;

if (target < *end)
{
List::Iterator iprev = i;
int split_edge = *end;
while (*i >= split_edge and i != end)
{
++i;
iprev = i;
int steps = skip;
while (--steps > 0 and i != end) ++i;
}

// when found return to the last known ok point and search forward 1 by 1
i = iprev;
while (*i >= split_edge and i != end) ++i;
}

{
List::Iterator iprev = i;
while (*i < target and i != end)
{
++i;
iprev = i;
int steps = skip;
while (--steps > 0 and i != end) ++i;
}

// when found return to the last known ok point and search forward 1 by 1
i = iprev;
while (*i < target and i != end) ++i;

if (*i == target) return i;
}

return  List::Iterator();
}

void test(std::string dataTag,
std::string algoTag,
std::function<List::Iterator (List& , int)> func,
List& data,
int target,
bool hasTarget)
{
auto start = std::chrono::system_clock::now();
List::Iterator result = func(data, target);
auto end   = std::chrono::system_clock::now();

std::cout << algoTag << " for " << dataTag << ": ";
if (not result.valid())
std::cout << (hasTarget ? "FAIL" : "pass");
else
std::cout << ((hasTarget and (target == *result)) ? "pass" : "FAIL");

std::cout << " -- time:"
<< std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()
<< " nSec" << std::endl;
}

void testSet(std::string tag,
List& data,
int target,
bool hasTarget)
{
test(tag, "naive ",  find_naive,       data, target, hasTarget);
test(tag, "naive2",  find_naive2,      data, target, hasTarget);
test(tag, "skip  ",  find_skip_start,  data, target, hasTarget);
test(tag, "2xskip",  find_double_skip, data, target, hasTarget);
test(tag, "nskip ",  find_n_skip,      data, target, hasTarget);
std::cout << "\n";
}

int main()
{
srand(std::chrono::system_clock::now().time_since_epoch().count());

{
List list({10,11,12,1,2,3,4,5,6,7,8});
testSet("small     ", list, 4, true);
}

{
List list(1000000);
testSet("big rand  ", list, *list.random(), true);
testSet("big nrand ", list, *list.random()-1, false);  // note that  random always is +2 ~ +7 so -1 means i will not be in the data..

testSet("big early ", list, *(list.begin() + 10), true);    // i hope.. its random so it might not be..
testSet("big nearly", list, *(list.begin() + 10)-1, false); // check early exit in first half on fail

testSet("big last  ", list, *(--list.begin()), true);
testSet("big nlast ", list, -1, false);
}
}



naive  for small     : pass -- time:3207 nSec
naive2 for small     : pass -- time:2175 nSec
skip   for small     : pass -- time:1996 nSec
2xskip for small     : pass -- time:2151 nSec
nskip  for small     : pass -- time:4232 nSec

naive  for big rand  : pass -- time:14229998 nSec
naive2 for big rand  : pass -- time:13228995 nSec
skip   for big rand  : pass -- time:12786059 nSec
2xskip for big rand  : pass -- time:10874261 nSec
nskip  for big rand  : pass -- time:9004453 nSec

naive  for big nrand : pass -- time:43750054 nSec
naive2 for big nrand : pass -- time:37382249 nSec
skip   for big nrand : pass -- time:17939681 nSec
2xskip for big nrand : pass -- time:15509062 nSec
nskip  for big nrand : pass -- time:12927864 nSec

naive  for big early : pass -- time:738 nSec
naive2 for big early : pass -- time:611 nSec
skip   for big early : pass -- time:632 nSec
2xskip for big early : pass -- time:708 nSec
nskip  for big early : pass -- time:1064 nSec

naive  for big nearly: pass -- time:28980196 nSec
naive2 for big nearly: pass -- time:27551947 nSec
skip   for big nearly: pass -- time:1034 nSec
2xskip for big nearly: pass -- time:719 nSec
nskip  for big nearly: pass -- time:1217 nSec

naive  for big last  : pass -- time:27341796 nSec
naive2 for big last  : pass -- time:27780499 nSec
skip   for big last  : pass -- time:28165415 nSec
2xskip for big last  : pass -- time:24593599 nSec
nskip  for big last  : pass -- time:22455148 nSec

naive  for big nlast : pass -- time:27195820 nSec
naive2 for big nlast : pass -- time:26584976 nSec
skip   for big nlast : pass -- time:8640245 nSec
2xskip for big nlast : pass -- time:7304119 nSec
nskip  for big nlast : pass -- time:6601818 nSec