Saturday, August 6, 2016

google interview: detect a cycle in a graph

// detect a cycle in a directed graph

#include <iostream>
#include <vector>
#include <unordered_set>
#include <unordered_map>
#include <memory>
#include <algorithm>
#include <functional>
#include <chrono>

struct Node
{
    static int gID;
    int id_;

    std::vector<Node*> kids_;

    Node() : id_(++gID) {}
    void link(Node& kid) { kids_.push_back(&kid); }
};

int Node::gID = 0;

// *************************************************
// *************** PRE BOARD WORK  *****************
// *************************************************
//
//  Basic loop with Node "R" going into unknown "Graph"
//                  +---+    << check for self link node in path
//    ~~~~~~~~~     |   |
//   (         ) <--R<--+
//    ) Graph (     ^
//   (         ) -?-+
//    ~~~~~~~~~
//
// Recursive case for "A" a child Node of "R"
//                  +---+---+    << check for prior node in path
//    ~~~~~~~~~     |   v   |
//   (         ) <--A<--R<--+
//    ) Graph (     ^   ^
//   (         ) -?-+-?-+
//    ~~~~~~~~~
//
// Recursive case for "B" with child Node "A"
//                  +---+---+--+    << check for *ANY* prior node in path
//    ~~~~~~~~~     |   v   v  |
//   (         ) <--B<--A<--R<-+
//    ) Graph (     ^   ^   ^
//   (         ) -?-+-?-+-?-+
//    ~~~~~~~~~
//
// etc
//
// Therefore first algo is to DFS visit all nodes and check if current nodes
// kids loop back into the path of nodes before the current one.

// *************************************************
// ***************** BASIC RECURSE *****************
// *************************************************

bool hasCycle_recurseImpl(Node* v,
                          std::vector<Node*>& path)
{
    // Mark the current node as part of pathway
    path.push_back(v);

    // then for all kids
    for(Node* i : v->kids_)
    {
        // if the kid is path of the path you have a loop done
        std::vector<Node*>::iterator pit =
            std::find(path.begin(), path.end(), i);

        if (pit != path.end())
            return true;

        // recurse into kid
        if (hasCycle_recurseImpl(i, path) )
            return true;
    }

    // remove the node from path
    path.pop_back();
    return false;
}

bool hasCycle_recurse(Node* root)
{
    std::vector<Node*> path;

    if (root == NULL) return false;

    return hasCycle_recurseImpl(root, path);
}


// *************************************************
// ***************** UNORDERED PATH ****************
// *************************************************

// optimization of path - ordering is of the path is *not* important.  As a
// result we can choose a data structure with O(1) search, insert and delete ie
// a hash

bool hasCycle_unordPathImpl(Node* v,
                            std::unordered_set<Node*>& path)
{
    // Mark the current node as part of pathway
    path.insert(v);

    // then for all kids
    for(Node* i : v->kids_)
    {
        // if the kid is path of the path you have a loop done
        if (path.find(i) != path.end())
            return true;

        // recurse into kid
        if (hasCycle_unordPathImpl(i, path) )
            return true;
    }

    // remove the node from path
    path.erase(v);
    return false;
}

bool hasCycle_unordPath(Node* root)
{
    std::unordered_set<Node*> path;

    if (root == NULL) return false;

    return hasCycle_unordPathImpl(root, path);
}

// *************************************************
// ****************** NO REVISIT  ******************
// *************************************************

// optimization of visiting - never revisit a node.  The logic behind this is
// easy but it took me a bit to actually see it.
//
// If you visit a node then the DFS will terminate on 2 possible conditions.
// 1) you found a cycle,
// 2) you exhausted all possible nodes below this one and didn't end up in a
// cycle.
//
// Now here is what took me a bit to get..  If you ever enter a path that forms
// a cycle you have no possible way to exit unless you found end of the cycle
// that means that you can never see a visited node unless it is about to start 
// a cycle or has been exhausted and will *never* form a cycle even if we add to
// the current path. IE if it could have formed a cycle with the current node 
// then it should have traversed into the current node already before it was
// exhausted.

bool hasCycle_1visitImpl(Node* v,
                         std::unordered_set<Node*>& visited,
                         std::unordered_set<Node*>& path)
{
    if(visited.find(v) == visited.end())
    {
        // Mark the current node as visited and part of pathway
        visited.insert(v);
        path.insert(v);

        // then for all kids
        for(Node* i : v->kids_)
        {
            // if the kid is path of the path you have a loop done
            if (path.find(i) != path.end())
                return true;

            // recurse into kid
            if (hasCycle_1visitImpl(i, visited, path) )
                return true;
        }

        // remove the node from path
        path.erase(v);
    }
    return false;
}

bool hasCycle_1visit(Node* root)
{
    std::unordered_set<Node*> visited;
    std::unordered_set<Node*> path;

    if (root == NULL) return false;

    return hasCycle_1visitImpl(root, visited, path);
}

// *************************************************
// *** CALL STACK, PATH and VISIT DUPLICATION  *****
// *************************************************

// call stack, path hash and visit hash optimization -
// the recursive call stack is basically the same as the path stack and the path
// stack is the same as the visit stack all 3 data structures can be made into
// one..  using an iterative approach that stores the parent node to remove call 
// stack, and a stateful hash entry to double as visit and child idx marking which 
// kid is currently in use when in the path

bool hasCycle_iterative(Node* root)
{
    typedef std::pair<Node*,int>  ParentKidIdx;
    std::unordered_map<Node*, ParentKidIdx> path;

    if (root == NULL) return false;

    // note the oddity with the new nodes current kids we start with the end kid
    // this way it naturally ends in a negative idx when kids are exhusted
    path[root] = ParentKidIdx(NULL, root->kids_.size());
    Node* current  = root;

    while (current != NULL)
    {
        std::unordered_map<Node*, ParentKidIdx>::iterator pit
            = path.find(current);

        if (pit == path.end())
            std::cout << "IMPOSSIBLE! we have a node without a state\n";

        // just for easy of undertanding std::pair's member names suck..
        // and as u can see this is a bit confusing
        Node*  parent = pit->second.first;
        int&   kidIdx = pit->second.second;  // note its *writable*

        // find the next kid that that current node should work on ..
        Node* newKid = NULL;
        while (kidIdx >= 0 and newKid == NULL)
        {
            // try to get the next kid
            --kidIdx;
            if (kidIdx >= 0)
            {
                // extract the kid
                newKid = current->kids_[kidIdx];

                // check if the kid has a state already
                std::unordered_map<Node*, ParentKidIdx>::iterator kit
                    = path.find(newKid);


                if (kit != path.end())
                {
                    // child is has aleady been visited but whats its current
                    // state at??
                    if (kit->second.second >= 0)
                        // it is activite and in the current path..so this is a
                        // cycle
                        return true;

                    // it was visited and exhusted..  waste of time move to
                    // next one
                    newKid = NULL;
                }
                else
                {
                    // kid has never been visied before make its init state
                    // entry and then visited it
                    path[newKid] = ParentKidIdx(current,
                                                newKid->kids_.size());
                }
            }
        }

        if (newKid != NULL)
        {
            // we found a new kid to explore.. jump in to it
            current = newKid;
        }
        else
        {
            // we have exhusted all the current ones kids..  move up to the
            // parent
            current = parent;
        }
    }
    return false;
}


// *************************************************
// ******************* TEST ************************
// *************************************************

void testCase(std::string tag,
              std::function<bool(Node*)> hasCycle,
              Node* root,
              bool expected)
{
    auto start = std::chrono::system_clock::now();
    bool result = hasCycle(root);
    auto end   = std::chrono::system_clock::now();

    std::cout << tag << ": " << (expected == result ? "pass" : "FAIL")
              << " -- time:"
              << std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()
              << " nSec" << std::endl;
}

void testCaseSet(std::string set,
                 Node* root,
                 bool expected)
{
    std::cout << "\n *** " << set << " ***\n";
    testCase("hasCycle_recurse", hasCycle_recurse,       root, expected);
    testCase("hasCycle_unord  ", hasCycle_unordPath,     root, expected);
    testCase("hasCycle_1visit ", hasCycle_1visit,        root, expected);
    testCase("hasCycle_iter   ", hasCycle_iterative,     root, expected);
}

void test()
{
    {
        testCaseSet("null",
                    NULL, false);
    }

    {
        Node a0;
        testCaseSet("single node",
                    &a0, false);
    }

    {
        Node a0;
        a0.link(a0); // cycle
        testCaseSet("single node cycle",
                    &a0, true);
    }

    {
        Node a0;
        Node a1;
        a0.link(a1);
        testCaseSet("two node",
                    &a0, false);
    }

    {
        Node a0;
        Node a1;

        a0.link(a1);
        a0.link(a1);
        testCaseSet("two node - double link",
                    &a0, false);
    }

    {
        Node a0;
        Node a1;

        a0.link(a1);
        a1.link(a0); // cycle
        testCaseSet("two node cycle",
                    &a0, true);
    }

    {
        // visit breaker
        Node a0;
        Node a1;
        Node a2;

        a0.link(a2);
        a0.link(a1);
        a1.link(a2);

        testCaseSet("triple node double end link - reversed",
                    &a0, false);
    }

    {
        // visit breaker
        Node a0;
        Node a1;
        Node a2;

        a0.link(a1);
        a0.link(a2);
        a1.link(a2);

        testCaseSet("triple node double end link",
                    &a0, false);
    }

    {
        Node a0;
        Node a1;
        Node a2;
        Node a3;

        a0.link(a2);
        a0.link(a1);
        a1.link(a2);
        a2.link(a0); // cycle..
        a2.link(a3);
        a3.link(a3); // cycle

        testCaseSet("some random tree",
                    &a0, true);
    }

    {
        // graph speed breaker wide first node with repeated long tail..
        Node head;
        Node chest;
        std::vector<Node> shoulders(100);
        for (int i = 0; i < shoulders.size(); ++i)
        {
            head.link(shoulders[i]);
            shoulders[i].link(chest);
        }

        std::vector<Node> tail(100);
        chest.link(tail[0]);
        for (int i = 1; i < tail.size(); ++i)
        {
            tail[i-1].link(tail[i]);
        }

        testCaseSet("head shoulder tail",
                    &head, false);

        Node looper;
        tail[tail.size()-1].link(looper);
        looper.link(head);

        testCaseSet("head shoulder tail - looped",
                    &head, true);
    }
    std::cout << std::endl;
}

int main()
{
    test();
}

*** null ***
hasCycle_recurse: pass -- time:2574 nSec
hasCycle_unord  : pass -- time:13314 nSec
hasCycle_1visit : pass -- time:4531 nSec
hasCycle_iter   : pass -- time:4097 nSec

 *** single node ***
hasCycle_recurse: pass -- time:4975 nSec
hasCycle_unord  : pass -- time:12330 nSec
hasCycle_1visit : pass -- time:9873 nSec
hasCycle_iter   : pass -- time:9279 nSec

 *** single node cycle ***
hasCycle_recurse: pass -- time:4614 nSec
hasCycle_unord  : pass -- time:8321 nSec
hasCycle_1visit : pass -- time:9863 nSec
hasCycle_iter   : pass -- time:8335 nSec

 *** two node ***
hasCycle_recurse: pass -- time:19714 nSec
hasCycle_unord  : pass -- time:9582 nSec
hasCycle_1visit : pass -- time:13318 nSec
hasCycle_iter   : pass -- time:11426 nSec

 *** two node - double link ***
hasCycle_recurse: pass -- time:5507 nSec
hasCycle_unord  : pass -- time:11714 nSec
hasCycle_1visit : pass -- time:13890 nSec
hasCycle_iter   : pass -- time:11678 nSec

 *** two node cycle ***
hasCycle_recurse: pass -- time:5299 nSec
hasCycle_unord  : pass -- time:7683 nSec
hasCycle_1visit : pass -- time:12348 nSec
hasCycle_iter   : pass -- time:9779 nSec

 *** triple node double end link - reversed ***
hasCycle_recurse: pass -- time:7694 nSec
hasCycle_unord  : pass -- time:13821 nSec
hasCycle_1visit : pass -- time:18194 nSec
hasCycle_iter   : pass -- time:12800 nSec

 *** triple node double end link ***
hasCycle_recurse: pass -- time:7322 nSec
hasCycle_unord  : pass -- time:13999 nSec
hasCycle_1visit : pass -- time:16998 nSec
hasCycle_iter   : pass -- time:12793 nSec

 *** some random tree ***
hasCycle_recurse: pass -- time:4840 nSec
hasCycle_unord  : pass -- time:7792 nSec
hasCycle_1visit : pass -- time:11853 nSec
hasCycle_iter   : pass -- time:15752 nSec

 *** head shoulder tail ***
hasCycle_recurse: pass -- time:18295450 nSec
hasCycle_unord  : pass -- time:14848133 nSec
hasCycle_1visit : pass -- time:615861 nSec
hasCycle_iter   : pass -- time:512595 nSec

 *** head shoulder tail - looped ***
hasCycle_recurse: pass -- time:131233 nSec
hasCycle_unord  : pass -- time:136192 nSec
hasCycle_1visit : pass -- time:242850 nSec
hasCycle_iter   : pass -- time:176559 nSec

No comments:

Post a Comment