// detect a cycle in a directed graph #include <iostream> #include <vector> #include <unordered_set> #include <unordered_map> #include <memory> #include <algorithm> #include <functional> #include <chrono> struct Node { static int gID; int id_; std::vector<Node*> kids_; Node() : id_(++gID) {} void link(Node& kid) { kids_.push_back(&kid); } }; int Node::gID = 0; // ************************************************* // *************** PRE BOARD WORK ***************** // ************************************************* // // Basic loop with Node "R" going into unknown "Graph" // +---+ << check for self link node in path // ~~~~~~~~~ | | // ( ) <--R<--+ // ) Graph ( ^ // ( ) -?-+ // ~~~~~~~~~ // // Recursive case for "A" a child Node of "R" // +---+---+ << check for prior node in path // ~~~~~~~~~ | v | // ( ) <--A<--R<--+ // ) Graph ( ^ ^ // ( ) -?-+-?-+ // ~~~~~~~~~ // // Recursive case for "B" with child Node "A" // +---+---+--+ << check for *ANY* prior node in path // ~~~~~~~~~ | v v | // ( ) <--B<--A<--R<-+ // ) Graph ( ^ ^ ^ // ( ) -?-+-?-+-?-+ // ~~~~~~~~~ // // etc // // Therefore first algo is to DFS visit all nodes and check if current nodes // kids loop back into the path of nodes before the current one. // ************************************************* // ***************** BASIC RECURSE ***************** // ************************************************* bool hasCycle_recurseImpl(Node* v, std::vector<Node*>& path) { // Mark the current node as part of pathway path.push_back(v); // then for all kids for(Node* i : v->kids_) { // if the kid is path of the path you have a loop done std::vector<Node*>::iterator pit = std::find(path.begin(), path.end(), i); if (pit != path.end()) return true; // recurse into kid if (hasCycle_recurseImpl(i, path) ) return true; } // remove the node from path path.pop_back(); return false; } bool hasCycle_recurse(Node* root) { std::vector<Node*> path; if (root == NULL) return false; return hasCycle_recurseImpl(root, path); } // ************************************************* // ***************** UNORDERED PATH **************** // ************************************************* // optimization of path - ordering is of the path is *not* important. As a // result we can choose a data structure with O(1) search, insert and delete ie // a hash bool hasCycle_unordPathImpl(Node* v, std::unordered_set<Node*>& path) { // Mark the current node as part of pathway path.insert(v); // then for all kids for(Node* i : v->kids_) { // if the kid is path of the path you have a loop done if (path.find(i) != path.end()) return true; // recurse into kid if (hasCycle_unordPathImpl(i, path) ) return true; } // remove the node from path path.erase(v); return false; } bool hasCycle_unordPath(Node* root) { std::unordered_set<Node*> path; if (root == NULL) return false; return hasCycle_unordPathImpl(root, path); } // ************************************************* // ****************** NO REVISIT ****************** // ************************************************* // optimization of visiting - never revisit a node. The logic behind this is // easy but it took me a bit to actually see it. // // If you visit a node then the DFS will terminate on 2 possible conditions. // 1) you found a cycle, // 2) you exhausted all possible nodes below this one and didn't end up in a // cycle. // // Now here is what took me a bit to get.. If you ever enter a path that forms // a cycle you have no possible way to exit unless you found end of the cycle // that means that you can never see a visited node unless it is about to start // a cycle or has been exhausted and will *never* form a cycle even if we add to // the current path. IE if it could have formed a cycle with the current node // then it should have traversed into the current node already before it was // exhausted. bool hasCycle_1visitImpl(Node* v, std::unordered_set<Node*>& visited, std::unordered_set<Node*>& path) { if(visited.find(v) == visited.end()) { // Mark the current node as visited and part of pathway visited.insert(v); path.insert(v); // then for all kids for(Node* i : v->kids_) { // if the kid is path of the path you have a loop done if (path.find(i) != path.end()) return true; // recurse into kid if (hasCycle_1visitImpl(i, visited, path) ) return true; } // remove the node from path path.erase(v); } return false; } bool hasCycle_1visit(Node* root) { std::unordered_set<Node*> visited; std::unordered_set<Node*> path; if (root == NULL) return false; return hasCycle_1visitImpl(root, visited, path); } // ************************************************* // *** CALL STACK, PATH and VISIT DUPLICATION ***** // ************************************************* // call stack, path hash and visit hash optimization - // the recursive call stack is basically the same as the path stack and the path // stack is the same as the visit stack all 3 data structures can be made into // one.. using an iterative approach that stores the parent node to remove call // stack, and a stateful hash entry to double as visit and child idx marking which // kid is currently in use when in the path bool hasCycle_iterative(Node* root) { typedef std::pair<Node*,int> ParentKidIdx; std::unordered_map<Node*, ParentKidIdx> path; if (root == NULL) return false; // note the oddity with the new nodes current kids we start with the end kid // this way it naturally ends in a negative idx when kids are exhusted path[root] = ParentKidIdx(NULL, root->kids_.size()); Node* current = root; while (current != NULL) { std::unordered_map<Node*, ParentKidIdx>::iterator pit = path.find(current); if (pit == path.end()) std::cout << "IMPOSSIBLE! we have a node without a state\n"; // just for easy of undertanding std::pair's member names suck.. // and as u can see this is a bit confusing Node* parent = pit->second.first; int& kidIdx = pit->second.second; // note its *writable* // find the next kid that that current node should work on .. Node* newKid = NULL; while (kidIdx >= 0 and newKid == NULL) { // try to get the next kid --kidIdx; if (kidIdx >= 0) { // extract the kid newKid = current->kids_[kidIdx]; // check if the kid has a state already std::unordered_map<Node*, ParentKidIdx>::iterator kit = path.find(newKid); if (kit != path.end()) { // child is has aleady been visited but whats its current // state at?? if (kit->second.second >= 0) // it is activite and in the current path..so this is a // cycle return true; // it was visited and exhusted.. waste of time move to // next one newKid = NULL; } else { // kid has never been visied before make its init state // entry and then visited it path[newKid] = ParentKidIdx(current, newKid->kids_.size()); } } } if (newKid != NULL) { // we found a new kid to explore.. jump in to it current = newKid; } else { // we have exhusted all the current ones kids.. move up to the // parent current = parent; } } return false; } // ************************************************* // ******************* TEST ************************ // ************************************************* void testCase(std::string tag, std::function<bool(Node*)> hasCycle, Node* root, bool expected) { auto start = std::chrono::system_clock::now(); bool result = hasCycle(root); auto end = std::chrono::system_clock::now(); std::cout << tag << ": " << (expected == result ? "pass" : "FAIL") << " -- time:" << std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count() << " nSec" << std::endl; } void testCaseSet(std::string set, Node* root, bool expected) { std::cout << "\n *** " << set << " ***\n"; testCase("hasCycle_recurse", hasCycle_recurse, root, expected); testCase("hasCycle_unord ", hasCycle_unordPath, root, expected); testCase("hasCycle_1visit ", hasCycle_1visit, root, expected); testCase("hasCycle_iter ", hasCycle_iterative, root, expected); } void test() { { testCaseSet("null", NULL, false); } { Node a0; testCaseSet("single node", &a0, false); } { Node a0; a0.link(a0); // cycle testCaseSet("single node cycle", &a0, true); } { Node a0; Node a1; a0.link(a1); testCaseSet("two node", &a0, false); } { Node a0; Node a1; a0.link(a1); a0.link(a1); testCaseSet("two node - double link", &a0, false); } { Node a0; Node a1; a0.link(a1); a1.link(a0); // cycle testCaseSet("two node cycle", &a0, true); } { // visit breaker Node a0; Node a1; Node a2; a0.link(a2); a0.link(a1); a1.link(a2); testCaseSet("triple node double end link - reversed", &a0, false); } { // visit breaker Node a0; Node a1; Node a2; a0.link(a1); a0.link(a2); a1.link(a2); testCaseSet("triple node double end link", &a0, false); } { Node a0; Node a1; Node a2; Node a3; a0.link(a2); a0.link(a1); a1.link(a2); a2.link(a0); // cycle.. a2.link(a3); a3.link(a3); // cycle testCaseSet("some random tree", &a0, true); } { // graph speed breaker wide first node with repeated long tail.. Node head; Node chest; std::vector<Node> shoulders(100); for (int i = 0; i < shoulders.size(); ++i) { head.link(shoulders[i]); shoulders[i].link(chest); } std::vector<Node> tail(100); chest.link(tail[0]); for (int i = 1; i < tail.size(); ++i) { tail[i-1].link(tail[i]); } testCaseSet("head shoulder tail", &head, false); Node looper; tail[tail.size()-1].link(looper); looper.link(head); testCaseSet("head shoulder tail - looped", &head, true); } std::cout << std::endl; } int main() { test(); }
*** null *** hasCycle_recurse: pass -- time:2574 nSec hasCycle_unord : pass -- time:13314 nSec hasCycle_1visit : pass -- time:4531 nSec hasCycle_iter : pass -- time:4097 nSec *** single node *** hasCycle_recurse: pass -- time:4975 nSec hasCycle_unord : pass -- time:12330 nSec hasCycle_1visit : pass -- time:9873 nSec hasCycle_iter : pass -- time:9279 nSec *** single node cycle *** hasCycle_recurse: pass -- time:4614 nSec hasCycle_unord : pass -- time:8321 nSec hasCycle_1visit : pass -- time:9863 nSec hasCycle_iter : pass -- time:8335 nSec *** two node *** hasCycle_recurse: pass -- time:19714 nSec hasCycle_unord : pass -- time:9582 nSec hasCycle_1visit : pass -- time:13318 nSec hasCycle_iter : pass -- time:11426 nSec *** two node - double link *** hasCycle_recurse: pass -- time:5507 nSec hasCycle_unord : pass -- time:11714 nSec hasCycle_1visit : pass -- time:13890 nSec hasCycle_iter : pass -- time:11678 nSec *** two node cycle *** hasCycle_recurse: pass -- time:5299 nSec hasCycle_unord : pass -- time:7683 nSec hasCycle_1visit : pass -- time:12348 nSec hasCycle_iter : pass -- time:9779 nSec *** triple node double end link - reversed *** hasCycle_recurse: pass -- time:7694 nSec hasCycle_unord : pass -- time:13821 nSec hasCycle_1visit : pass -- time:18194 nSec hasCycle_iter : pass -- time:12800 nSec *** triple node double end link *** hasCycle_recurse: pass -- time:7322 nSec hasCycle_unord : pass -- time:13999 nSec hasCycle_1visit : pass -- time:16998 nSec hasCycle_iter : pass -- time:12793 nSec *** some random tree *** hasCycle_recurse: pass -- time:4840 nSec hasCycle_unord : pass -- time:7792 nSec hasCycle_1visit : pass -- time:11853 nSec hasCycle_iter : pass -- time:15752 nSec *** head shoulder tail *** hasCycle_recurse: pass -- time:18295450 nSec hasCycle_unord : pass -- time:14848133 nSec hasCycle_1visit : pass -- time:615861 nSec hasCycle_iter : pass -- time:512595 nSec *** head shoulder tail - looped *** hasCycle_recurse: pass -- time:131233 nSec hasCycle_unord : pass -- time:136192 nSec hasCycle_1visit : pass -- time:242850 nSec hasCycle_iter : pass -- time:176559 nSec
No comments:
Post a Comment