## Saturday, August 6, 2016

### google interview: detect a cycle in a graph

// detect a cycle in a directed graph

#include <iostream>
#include <vector>
#include <unordered_set>
#include <unordered_map>
#include <memory>
#include <algorithm>
#include <functional>
#include <chrono>

struct Node
{
static int gID;
int id_;

std::vector<Node*> kids_;

Node() : id_(++gID) {}
void link(Node& kid) { kids_.push_back(&kid); }
};

int Node::gID = 0;

// *************************************************
// *************** PRE BOARD WORK  *****************
// *************************************************
//
//  Basic loop with Node "R" going into unknown "Graph"
//                  +---+    << check for self link node in path
//    ~~~~~~~~~     |   |
//   (         ) <--R<--+
//    ) Graph (     ^
//   (         ) -?-+
//    ~~~~~~~~~
//
// Recursive case for "A" a child Node of "R"
//                  +---+---+    << check for prior node in path
//    ~~~~~~~~~     |   v   |
//   (         ) <--A<--R<--+
//    ) Graph (     ^   ^
//   (         ) -?-+-?-+
//    ~~~~~~~~~
//
// Recursive case for "B" with child Node "A"
//                  +---+---+--+    << check for *ANY* prior node in path
//    ~~~~~~~~~     |   v   v  |
//   (         ) <--B<--A<--R<-+
//    ) Graph (     ^   ^   ^
//   (         ) -?-+-?-+-?-+
//    ~~~~~~~~~
//
// etc
//
// Therefore first algo is to DFS visit all nodes and check if current nodes
// kids loop back into the path of nodes before the current one.

// *************************************************
// ***************** BASIC RECURSE *****************
// *************************************************

bool hasCycle_recurseImpl(Node* v,
std::vector<Node*>& path)
{
// Mark the current node as part of pathway
path.push_back(v);

// then for all kids
for(Node* i : v->kids_)
{
// if the kid is path of the path you have a loop done
std::vector<Node*>::iterator pit =
std::find(path.begin(), path.end(), i);

if (pit != path.end())
return true;

// recurse into kid
if (hasCycle_recurseImpl(i, path) )
return true;
}

// remove the node from path
path.pop_back();
return false;
}

bool hasCycle_recurse(Node* root)
{
std::vector<Node*> path;

if (root == NULL) return false;

return hasCycle_recurseImpl(root, path);
}

// *************************************************
// ***************** UNORDERED PATH ****************
// *************************************************

// optimization of path - ordering is of the path is *not* important.  As a
// result we can choose a data structure with O(1) search, insert and delete ie
// a hash

bool hasCycle_unordPathImpl(Node* v,
std::unordered_set<Node*>& path)
{
// Mark the current node as part of pathway
path.insert(v);

// then for all kids
for(Node* i : v->kids_)
{
// if the kid is path of the path you have a loop done
if (path.find(i) != path.end())
return true;

// recurse into kid
if (hasCycle_unordPathImpl(i, path) )
return true;
}

// remove the node from path
path.erase(v);
return false;
}

bool hasCycle_unordPath(Node* root)
{
std::unordered_set<Node*> path;

if (root == NULL) return false;

return hasCycle_unordPathImpl(root, path);
}

// *************************************************
// ****************** NO REVISIT  ******************
// *************************************************

// optimization of visiting - never revisit a node.  The logic behind this is
// easy but it took me a bit to actually see it.
//
// If you visit a node then the DFS will terminate on 2 possible conditions.
// 1) you found a cycle,
// 2) you exhausted all possible nodes below this one and didn't end up in a
// cycle.
//
// Now here is what took me a bit to get..  If you ever enter a path that forms
// a cycle you have no possible way to exit unless you found end of the cycle
// that means that you can never see a visited node unless it is about to start
// a cycle or has been exhausted and will *never* form a cycle even if we add to
// the current path. IE if it could have formed a cycle with the current node
// then it should have traversed into the current node already before it was
// exhausted.

bool hasCycle_1visitImpl(Node* v,
std::unordered_set<Node*>& visited,
std::unordered_set<Node*>& path)
{
if(visited.find(v) == visited.end())
{
// Mark the current node as visited and part of pathway
visited.insert(v);
path.insert(v);

// then for all kids
for(Node* i : v->kids_)
{
// if the kid is path of the path you have a loop done
if (path.find(i) != path.end())
return true;

// recurse into kid
if (hasCycle_1visitImpl(i, visited, path) )
return true;
}

// remove the node from path
path.erase(v);
}
return false;
}

bool hasCycle_1visit(Node* root)
{
std::unordered_set<Node*> visited;
std::unordered_set<Node*> path;

if (root == NULL) return false;

return hasCycle_1visitImpl(root, visited, path);
}

// *************************************************
// *** CALL STACK, PATH and VISIT DUPLICATION  *****
// *************************************************

// call stack, path hash and visit hash optimization -
// the recursive call stack is basically the same as the path stack and the path
// stack is the same as the visit stack all 3 data structures can be made into
// one..  using an iterative approach that stores the parent node to remove call
// stack, and a stateful hash entry to double as visit and child idx marking which
// kid is currently in use when in the path

bool hasCycle_iterative(Node* root)
{
typedef std::pair<Node*,int>  ParentKidIdx;
std::unordered_map<Node*, ParentKidIdx> path;

if (root == NULL) return false;

// note the oddity with the new nodes current kids we start with the end kid
// this way it naturally ends in a negative idx when kids are exhusted
path[root] = ParentKidIdx(NULL, root->kids_.size());
Node* current  = root;

while (current != NULL)
{
std::unordered_map<Node*, ParentKidIdx>::iterator pit
= path.find(current);

if (pit == path.end())
std::cout << "IMPOSSIBLE! we have a node without a state\n";

// just for easy of undertanding std::pair's member names suck..
// and as u can see this is a bit confusing
Node*  parent = pit->second.first;
int&   kidIdx = pit->second.second;  // note its *writable*

// find the next kid that that current node should work on ..
Node* newKid = NULL;
while (kidIdx >= 0 and newKid == NULL)
{
// try to get the next kid
--kidIdx;
if (kidIdx >= 0)
{
// extract the kid
newKid = current->kids_[kidIdx];

// check if the kid has a state already
std::unordered_map<Node*, ParentKidIdx>::iterator kit
= path.find(newKid);

if (kit != path.end())
{
// child is has aleady been visited but whats its current
// state at??
if (kit->second.second >= 0)
// it is activite and in the current path..so this is a
// cycle
return true;

// it was visited and exhusted..  waste of time move to
// next one
newKid = NULL;
}
else
{
// kid has never been visied before make its init state
// entry and then visited it
path[newKid] = ParentKidIdx(current,
newKid->kids_.size());
}
}
}

if (newKid != NULL)
{
// we found a new kid to explore.. jump in to it
current = newKid;
}
else
{
// we have exhusted all the current ones kids..  move up to the
// parent
current = parent;
}
}
return false;
}

// *************************************************
// ******************* TEST ************************
// *************************************************

void testCase(std::string tag,
std::function<bool(Node*)> hasCycle,
Node* root,
bool expected)
{
auto start = std::chrono::system_clock::now();
bool result = hasCycle(root);
auto end   = std::chrono::system_clock::now();

std::cout << tag << ": " << (expected == result ? "pass" : "FAIL")
<< " -- time:"
<< std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()
<< " nSec" << std::endl;
}

void testCaseSet(std::string set,
Node* root,
bool expected)
{
std::cout << "\n *** " << set << " ***\n";
testCase("hasCycle_recurse", hasCycle_recurse,       root, expected);
testCase("hasCycle_unord  ", hasCycle_unordPath,     root, expected);
testCase("hasCycle_1visit ", hasCycle_1visit,        root, expected);
testCase("hasCycle_iter   ", hasCycle_iterative,     root, expected);
}

void test()
{
{
testCaseSet("null",
NULL, false);
}

{
Node a0;
testCaseSet("single node",
&a0, false);
}

{
Node a0;
testCaseSet("single node cycle",
&a0, true);
}

{
Node a0;
Node a1;
testCaseSet("two node",
&a0, false);
}

{
Node a0;
Node a1;

&a0, false);
}

{
Node a0;
Node a1;

testCaseSet("two node cycle",
&a0, true);
}

{
// visit breaker
Node a0;
Node a1;
Node a2;

testCaseSet("triple node double end link - reversed",
&a0, false);
}

{
// visit breaker
Node a0;
Node a1;
Node a2;

&a0, false);
}

{
Node a0;
Node a1;
Node a2;
Node a3;

testCaseSet("some random tree",
&a0, true);
}

{
// graph speed breaker wide first node with repeated long tail..
Node chest;
std::vector<Node> shoulders(100);
for (int i = 0; i < shoulders.size(); ++i)
{
}

std::vector<Node> tail(100);
for (int i = 1; i < tail.size(); ++i)
{
}

Node looper;

}
std::cout << std::endl;
}

int main()
{
test();
}


*** null ***
hasCycle_recurse: pass -- time:2574 nSec
hasCycle_unord  : pass -- time:13314 nSec
hasCycle_1visit : pass -- time:4531 nSec
hasCycle_iter   : pass -- time:4097 nSec

*** single node ***
hasCycle_recurse: pass -- time:4975 nSec
hasCycle_unord  : pass -- time:12330 nSec
hasCycle_1visit : pass -- time:9873 nSec
hasCycle_iter   : pass -- time:9279 nSec

*** single node cycle ***
hasCycle_recurse: pass -- time:4614 nSec
hasCycle_unord  : pass -- time:8321 nSec
hasCycle_1visit : pass -- time:9863 nSec
hasCycle_iter   : pass -- time:8335 nSec

*** two node ***
hasCycle_recurse: pass -- time:19714 nSec
hasCycle_unord  : pass -- time:9582 nSec
hasCycle_1visit : pass -- time:13318 nSec
hasCycle_iter   : pass -- time:11426 nSec

*** two node - double link ***
hasCycle_recurse: pass -- time:5507 nSec
hasCycle_unord  : pass -- time:11714 nSec
hasCycle_1visit : pass -- time:13890 nSec
hasCycle_iter   : pass -- time:11678 nSec

*** two node cycle ***
hasCycle_recurse: pass -- time:5299 nSec
hasCycle_unord  : pass -- time:7683 nSec
hasCycle_1visit : pass -- time:12348 nSec
hasCycle_iter   : pass -- time:9779 nSec

*** triple node double end link - reversed ***
hasCycle_recurse: pass -- time:7694 nSec
hasCycle_unord  : pass -- time:13821 nSec
hasCycle_1visit : pass -- time:18194 nSec
hasCycle_iter   : pass -- time:12800 nSec

*** triple node double end link ***
hasCycle_recurse: pass -- time:7322 nSec
hasCycle_unord  : pass -- time:13999 nSec
hasCycle_1visit : pass -- time:16998 nSec
hasCycle_iter   : pass -- time:12793 nSec

*** some random tree ***
hasCycle_recurse: pass -- time:4840 nSec
hasCycle_unord  : pass -- time:7792 nSec
hasCycle_1visit : pass -- time:11853 nSec
hasCycle_iter   : pass -- time:15752 nSec

hasCycle_recurse: pass -- time:18295450 nSec
hasCycle_unord  : pass -- time:14848133 nSec
hasCycle_1visit : pass -- time:615861 nSec
hasCycle_iter   : pass -- time:512595 nSec

*** head shoulder tail - looped ***
hasCycle_recurse: pass -- time:131233 nSec
hasCycle_unord  : pass -- time:136192 nSec
hasCycle_1visit : pass -- time:242850 nSec
hasCycle_iter   : pass -- time:176559 nSec