// http://www.impactinterview.com/2009/10/140-google-interview-questions/#software_engineer // You are given a list of numbers. When you reach the end of the list you // will come back to the beginning of the list (a circular list). Write the // most efficient algorithm to find the minimum # in this list. Find any given // # in the list. The numbers in the list are always increasing but you // don’t know where the circular list begins, ie: 38, 40, 55, 89, 6, 13, 20, // 23, 36. // First of all note very carefully this is for a *unidirectional circular linked // list* if we alter the design of the data structure there are much better ways // to do this.. but i believe that is out of bounds as a potential answer. // hmm.. essentially the find min number algorithm is a component of the find // any number algorithm.. Since the question isn't written as find the min, // then fix the list start and then find any # ill assume that it was meant as // a helper step to get to the find algorithm.. so in the interest of getting // it done lets just cut to the find any # algorithm and code a few evolutions // showing efficiency improvements.. #include <iostream> #include <vector> #include <chrono> #include <functional> // **************************************** // *************** CYCLIC LIST ************ // **************************************** class List { std::vector<int> data_; int offset_; public: class Iterator { List* list_; int index_; public: Iterator() : list_(NULL), index_(-1) {} Iterator(List* list) : list_(list), index_(list->offset_) {} // WARNING DO NOT USE in find.. that was not spec'ed // this is for testing only Iterator(List* list, int offset) : list_(list), index_(offset) {} Iterator& operator++() { index_ = (index_ + 1) % list_->data_.size(); return *this; } Iterator operator+(int delta) { int offset = (index_ + delta) % list_->data_.size(); while (offset < 0) offset += list_->data_.size(); return Iterator(list_, offset); } // WARNING DO NOT UE in find.. that was not spec'ed // this is for testing only Iterator& operator--() { index_ = (list_->data_.size() + index_ - 1) % list_->data_.size(); return *this; } int operator*() { return list_->data_[index_]; } bool operator==(const Iterator& other) const { return other.list_ == list_ and other.index_ == index_; } bool operator!=(const Iterator& other) const { return not (other == *this); } bool valid() const { return list_ != NULL; } }; List(std::initializer_list<int> data, int offset = 0) : data_(data), offset_(offset) {} List(int size) : data_(size), offset_(rand() % size) { // random fill int value = 0; for (int i = 0; i < size; ++i) { value += 2 + rand() % 5; data_[i] = value; } } bool empty() const { return data_.empty(); } Iterator begin() { return Iterator(this); } Iterator random() { return Iterator(this, rand() % data_.size()); } }; // **************************************** // *************** FIND NAIVE ************* // **************************************** List::Iterator find_naive(List& data, int target) { if (data.empty()) return List::Iterator(); List::Iterator start = data.begin(); List::Iterator i = start; do { if (*i == target) return i; ++i; } while (i != start); return List::Iterator(); } // **************************************** // ************* FIND NAIVE V2 ************ // **************************************** // Attempt to simply the code a bit so that we handle the cyclic iteration and // partial sorted issue cleaner for latter scaling List::Iterator find_naive2(List& data, int target) { if (data.empty()) return List::Iterator(); //termination is an issue because start *is* the end .. // simplify this by checking the start.. List::Iterator end = data.begin(); if (target == *end) return end; // then start at the first item after and now "start" is really the end List::Iterator i = end + 1; while (i != end and *i != target) ++i; if (*i == target) return i; return List::Iterator(); } // Of course you could try a divide and conquer idea.. but your dealing with a // list that you want to divide in half. Dividing it it half means you need to // step the iterator the mid point.. So assuming you know the size of the list // the total search on a fully sorted list will *always* cost u n/2 + n/4 + n/8 // ... moves. Which totals to n-1 (ie if n=8 is 4+2+1=7).. So the performance // is actually worst on average for the naive version ... of course this might // not be true because the cost of iterators might be very low compared to the // compare.. but i dont think it is... so im skipping it.. // **************************************** // ******** SKIP TO SORTED SECTION ******** // **************************************** // What we can really do is try to take advantage of the sorted order of data // and terminate early.. In order to do that what you have to do is skip to // the correct sorted part for your target and then search... (hence the find // min number stuff before) // Keep in mind this gets much less robust to problems with the data.. ie // if the list isnt sort (and parted) before hand.. List::Iterator find_skip_start(List& data, int target) { // handle the empty dataset case.. if (data.empty()) return List::Iterator(); //termination is an issue because start *is* the end .. // simplify this by checking the start.. List::Iterator end = data.begin(); if (target == *end) return end; // then start at the first item after and now "start" is really the end List::Iterator i = end + 1; // if the target is in the second part if (target < *end) { // skip to the second half int split_edge = *end; while (*i >= split_edge and i != end) ++i; } // then search for the target while (*i < target and i != end) ++i; if (*i == target) return i; return List::Iterator(); } // **************************************** // ******* DOUBLE SKIP IN SECTION ********* // **************************************** // Now because its sorted you dont have to check items every time you move // forward. You can step and then roll back if you went to far.. this makes // some big assumptions about the cost of iterator movement vs integer // compares.. my "List" class isnt great but it works out.. and in // theory should cut out every second compare.. List::Iterator find_double_skip(List& data, int target) { // handle the empty dataset case.. if (data.empty()) return List::Iterator(); //termination is an issue because start *is* the end .. // simplify this by checking the start.. List::Iterator end = data.begin(); if (target == *end) return end; // then start at the first item after and now "start" is really the end List::Iterator i = end + 1; if (target < *end) { int split_edge = *end; // double skip to second half List::Iterator iprev; while (*i >= split_edge and i != end) { ++i; iprev = i; if (i != end) ++i; } // check if we need to back up the double step. if (iprev.valid() and *iprev < split_edge) i = iprev; } { // double skip till target found or exceeded List::Iterator iprev; while (*i < target and i != end) { ++i; iprev = i; if (i != end) ++i; } // check if found the target at either of the pointers.. if (iprev.valid() and *iprev == target) return iprev; if (*i == target) return i; } return List::Iterator(); } // **************************************** // ********** N SKIP IN SECTION *********** // **************************************** // Optimization.. of course if u can do better with steps of 2 then u can // generalize it to any number of steps.. for example using steps of 20.. now // we are really playing balance games.. trading the speeds of various // comparisons, assignments etc.. Then u have to tune the size of skip for // your machine(s) List::Iterator find_n_skip(List& data, int target) { const int skip = 20; // handle the empty dataset case.. if (data.empty()) return List::Iterator(); //termination is an issue because start *is* the end .. // simplify this by checking the start.. List::Iterator end = data.begin(); if (target == *end) return end; // then start at the first item after and now "start" is really the end List::Iterator i = end + 1; if (target < *end) { List::Iterator iprev = i; int split_edge = *end; // skip to second half while (*i >= split_edge and i != end) { ++i; iprev = i; int steps = skip; while (--steps > 0 and i != end) ++i; } // when found return to the last known ok point and search forward 1 by 1 i = iprev; while (*i >= split_edge and i != end) ++i; } { List::Iterator iprev = i; while (*i < target and i != end) { ++i; iprev = i; int steps = skip; while (--steps > 0 and i != end) ++i; } // when found return to the last known ok point and search forward 1 by 1 i = iprev; while (*i < target and i != end) ++i; if (*i == target) return i; } return List::Iterator(); } void test(std::string dataTag, std::string algoTag, std::function<List::Iterator (List& , int)> func, List& data, int target, bool hasTarget) { auto start = std::chrono::system_clock::now(); List::Iterator result = func(data, target); auto end = std::chrono::system_clock::now(); std::cout << algoTag << " for " << dataTag << ": "; if (not result.valid()) std::cout << (hasTarget ? "FAIL" : "pass"); else std::cout << ((hasTarget and (target == *result)) ? "pass" : "FAIL"); std::cout << " -- time:" << std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count() << " nSec" << std::endl; } void testSet(std::string tag, List& data, int target, bool hasTarget) { test(tag, "naive ", find_naive, data, target, hasTarget); test(tag, "naive2", find_naive2, data, target, hasTarget); test(tag, "skip ", find_skip_start, data, target, hasTarget); test(tag, "2xskip", find_double_skip, data, target, hasTarget); test(tag, "nskip ", find_n_skip, data, target, hasTarget); std::cout << "\n"; } int main() { srand(std::chrono::system_clock::now().time_since_epoch().count()); { List list({10,11,12,1,2,3,4,5,6,7,8}); testSet("small ", list, 4, true); } { List list(1000000); testSet("big rand ", list, *list.random(), true); testSet("big nrand ", list, *list.random()-1, false); // note that random always is +2 ~ +7 so -1 means i will not be in the data.. testSet("big early ", list, *(list.begin() + 10), true); // i hope.. its random so it might not be.. testSet("big nearly", list, *(list.begin() + 10)-1, false); // check early exit in first half on fail testSet("big last ", list, *(--list.begin()), true); testSet("big nlast ", list, -1, false); } }
naive for small : pass -- time:3207 nSec naive2 for small : pass -- time:2175 nSec skip for small : pass -- time:1996 nSec 2xskip for small : pass -- time:2151 nSec nskip for small : pass -- time:4232 nSec naive for big rand : pass -- time:14229998 nSec naive2 for big rand : pass -- time:13228995 nSec skip for big rand : pass -- time:12786059 nSec 2xskip for big rand : pass -- time:10874261 nSec nskip for big rand : pass -- time:9004453 nSec naive for big nrand : pass -- time:43750054 nSec naive2 for big nrand : pass -- time:37382249 nSec skip for big nrand : pass -- time:17939681 nSec 2xskip for big nrand : pass -- time:15509062 nSec nskip for big nrand : pass -- time:12927864 nSec naive for big early : pass -- time:738 nSec naive2 for big early : pass -- time:611 nSec skip for big early : pass -- time:632 nSec 2xskip for big early : pass -- time:708 nSec nskip for big early : pass -- time:1064 nSec naive for big nearly: pass -- time:28980196 nSec naive2 for big nearly: pass -- time:27551947 nSec skip for big nearly: pass -- time:1034 nSec 2xskip for big nearly: pass -- time:719 nSec nskip for big nearly: pass -- time:1217 nSec naive for big last : pass -- time:27341796 nSec naive2 for big last : pass -- time:27780499 nSec skip for big last : pass -- time:28165415 nSec 2xskip for big last : pass -- time:24593599 nSec nskip for big last : pass -- time:22455148 nSec naive for big nlast : pass -- time:27195820 nSec naive2 for big nlast : pass -- time:26584976 nSec skip for big nlast : pass -- time:8640245 nSec 2xskip for big nlast : pass -- time:7304119 nSec nskip for big nlast : pass -- time:6601818 nSec
No comments:
Post a Comment