Wednesday, August 3, 2016

Google interview: Implement an AVL tree ...

Seems to me that google is most likely to ask about Tree inserts over erase. Also they might ask for traversal, infix printing etc etc... basically the cheapest way to do this is just code the whole AVL tree eco system period...

AVL trees look complex but they basically are a BST tree with an additional pair of rebalancing rotations that you apply to them after an insert or erase.. this makes them somewhat close to the theoretical log(n) depth of a well balanced BST tree...

There are 2 rotate operations that are depicted as follows. they are reversed depending of the side that are viewing them from (some people call them 4 with i think just over complicates them), personally i know them as inside and outside rotates, you can tell by looking at the position of the nodes grand child.. if its under the grandparent its inside, otherwise its outside.

a) grandchild-outside-child (left side)
T1, T2, T3 and T4 are subtrees.
z                            y
/ \                         /   \
y   T4  Right Rotate (z)    x      z
/ \      --------------->  /  \    /  \
x   T3                     T1  T2  T3  T4
/ \
T1   T2

b) grandchild-inside-child (left-side)
z                            z                            x
/ \                         /   \                        /   \
y   T4  Left Rotate (y)     x    T4   Right Rotate(z)   y      z
/ \      -------------->    /  \       -------------->  / \    / \
T1   x                       y    T3                     T1  T2 T3  T4
/ \                     / \
T2   T3                 T1   T2


The trick to knowing when to perform the rebalance ops is to check the height balance between the nodes left and right side of the current one. If the height balance is off by more than +/- 1 you need to rotate, the sign tells you the direction. Then to determine if its the inner or outer case you check the balance of the child node.. as you can see from the diagram above the outer case should already have an inbalance in the *same* direction(sign) as the parents inbalance if it does not then you have the inner case and need to adjust it to make the other case before doing the rotate to correct..

#include <stdint.h>
#include <vector>
#include <algorithm>
#include <iostream>
#include <functional>

template <typename T>
class AvlTree
{
public:
// late binding via constructor - dont pollute type
typedef std::function<bool(const T&, const T&)> Comparer;

private:
struct Node
{
uint32_t height_;
Node*    left_;
Node*    right_;
T        item_;

Node(const T& item) :
height_(1),
left_(NULL),
right_(NULL),
item_(item)
{}

~Node()
{
if (left_  != NULL) delete left_;
if (right_ != NULL) delete right_;
}

int height() const
{
int leftHeight  = (left_  != NULL) ? left_->height_  : 0;
int rightHeight = (right_ != NULL) ? right_->height_ : 0;

return std::max(leftHeight, rightHeight) + 1;
}

int balance() const
{
int leftHeight  = (left_  != NULL) ? left_->height_  : 0;
int rightHeight = (right_ != NULL) ? right_->height_ : 0;

return leftHeight - rightHeight;
}

};
public:
class Iterator
{
// infix DFS reshaped into stack for an iterator.. fun but crazy...
//
// Ok..  So i got creative here..  rather than use parent pointers in the
// tree I implemented the iterator as a DFS parent stack..  now you do
// *not* want to do this in production because its actually a lot more
// fragile than the parent pointer solution..
//
// consider the following trade offs
// - the find operation needs to create the stack which is costly and
//  iterator returned from the find will often *not* move making it
//  needless
// - the iterator is less robust to tree updates.  with a parent
//  pointer the iterator just points at one node so changing anything that
//  is not the current node is generally ok..  However with the stack
//  implementation any update that effects the parent chain breaks the
//  iterators stack and makes a large issue

// TODO can a bi-directional version be made??...

typedef std::vector<Node*> Stack;

Stack stack_;

void enstackLeft(Node* node);
void next();

public:
Iterator(Node* root);   // not publicily usable because Node is not public!
Iterator();
Iterator& operator++();
void      operator++(int);
const T&  operator*() const;
const T*  operator->() const;

T&  operator*() ;
T*  operator->();

int height() const; // yep mostly for testing

bool  operator==(const Iterator& other) const;
bool  operator!=(const Iterator& other) const;
};

class Handle
{
// a light weight *immovable* iterator .. so *not* an iterator
// TODO it *is* possible to lazy covert to a real iterator if ++'ed
//  but whatever the stacked based iterator is crazy
Node* node_;
public:
Handle() :
node_(NULL)
{}

Handle(Node* node) :
node_(node)
{}

const T& operator*()  const { return node_->item_; }
T& operator*()        { return node_->item_; }

const T* operator->() const { return &(node_->item_); }
T* operator->()       { return &(node_->item_); }

operator bool () const      { return node_ != NULL; }
};

private:
Node*   root_;
Comparer cmp_;

// TODO compilation firewalling issues...
//  ie can this be converted to a base Node without T and a factory to
//  create and handle Nodes with T.

void rotateLeft (Node*& node);
void rotateRight(Node*& node);
void rebalance  (Node*& node);

void insertImpl (const T& item,
Node*& node);

void replacementEraseRight(Node*& node,
Node*& other);

void eraseImpl(const T& item,
Node*& node);

public:
AvlTree() :
root_(NULL),
cmp_(std::less<T>())
{}

AvlTree(Comparer cmp) :
root_(NULL),
cmp_(cmp)
{}

~AvlTree()
{
if (root_ != NULL) delete root_;
}

void insert(const T& item);
void erase(const T& item);
Handle find(const T& item);
void print(std::ostream& os);

Iterator begin();
Iterator end();
};

// *************************************************
// ******************** ITERATOR *******************
// *************************************************

template <typename T>
AvlTree<T>::Iterator::Iterator() :
stack_()
{}

template <typename T>
AvlTree<T>::Iterator::Iterator(AvlTree<T>::Node* root) :
stack_()
{
//when we add new nodes we go down the left side and enstack then
enstackLeft(root);
}

template <typename T>
void AvlTree<T>::Iterator::enstackLeft(typename AvlTree<T>::Node* node)
{
while (node != NULL)
{
stack_.push_back(node);
node = node->left_;
}
}

template <typename T>
void AvlTree<T>::Iterator::next()
{
// node has just completed left side and self visit.. remove me from the stack
Node* current = stack_.back();
stack_.pop_back();

// and enstack add my right branch
enstackLeft(current->right_);
}

template <typename T>
const T& AvlTree<T>::Iterator::operator*() const
{
return stack_.back()->item_;
}

template <typename T>
T& AvlTree<T>::Iterator::operator*()
{
return stack_.back()->item_;
}

template <typename T>
const T* AvlTree<T>::Iterator::operator->() const
{
return &(stack_.back()->item_);
}

template <typename T>
T* AvlTree<T>::Iterator::operator->()
{
return &(stack_.back()->item_);
}

template <typename T>
int AvlTree<T>::Iterator::height() const
{
return stack_.back()->height_;
}

template <typename T>
typename AvlTree<T>::Iterator& AvlTree<T>::Iterator::operator++()
{
next();
return *this;
}

// NOTE void on x++ to termiate the post iterator junk
template <typename T>
void AvlTree<T>::Iterator::operator++(int)
{
// Node* prev = stack_.back();
next();
// return ...;
}

template <typename T>
bool AvlTree<T>::Iterator::operator==(const AvlTree<T>::Iterator& other) const
{
// TODO short cut the last item?? would be quicker
return other.stack_ == stack_;
}

template <typename T>
bool AvlTree<T>::Iterator::operator!=(const AvlTree<T>::Iterator& other) const
{
// TODO short cut the last item?? would be quicker
return not (other == *this);
}

template <typename T>
typename AvlTree<T>::Iterator AvlTree<T>::begin()
{
return Iterator(root_);
}

template <typename T>
typename AvlTree<T>::Iterator AvlTree<T>::end()
{
return Iterator();
}

// *************************************************
// ********************** FIND *********************
// *************************************************

// this is a bit of a problem using stack based iterators makes find
// more weighty using nodes exposes the internals
// lightwieght_iterator?? one that cant move but gives you access.. a "handle"
template <typename T>
typename AvlTree<T>::Handle AvlTree<T>::find(const T& item)
{
Node* node = root_;
while (node != NULL)
{
if      (cmp_(item, node->item_))   // less *than*
{
node = node->left_;
}
else if (cmp_(node->item_, item))   // greater *than*
{
node = node->right_;
}
else
{
return typename AvlTree<T>::Handle(node);
}
}
return typename AvlTree<T>::Handle();
}

// *************************************************
// ********************** PRINT ********************
// *************************************************

template <typename T>
void AvlTree<T>::print(std::ostream& os)
{
for (AvlTree<uint32_t>::Iterator it = begin();
it != end();
++it)
{
os << *it << "(" << it.height()  << ")" << " ";
}
}

template <typename T>
std::ostream& operator<<(std::ostream& os, AvlTree<T>& tree)
{
tree.print(os);
return os;
}

// *************************************************
// ******************** REBALANCE ******************
// *************************************************

template <typename T>
void AvlTree<T>::rotateLeft(typename AvlTree<T>::Node*& node)
{
// given the current node make its left the top
Node* right  = node->right_;
if (right == NULL) return;  // this will explode.. stop!

node->right_ = right->left_;
right->left_  = node;

node->height_  = node->height();
right->height_ = right->height();

// keep in mind that this is using Node*& so the following updates the
// nodes parent left/right or the root!
node = right;
}

template <typename T>
void AvlTree<T>::rotateRight(typename AvlTree<T>::Node*& node)
{
// given the current node make its left the top
Node* left  = node->left_;
if (left == NULL) return; // this will explode.. stop!

node->left_ = left->right_;
left->right_ = node;

node->height_ = node->height();
left->height_ = left->height();

// keep in mind that this is using Node*& so the following updates the
// nodes parent left/right or the root!
node = left;
}

// attempt to rebalance in a general way for both insert delete..
template <typename T>
void AvlTree<T>::rebalance(typename AvlTree<T>::Node*& node)
{
if (node == NULL) return;

// correct node height..
node->height_ = node->height();

// and compute AVL rotations
int balance = node->balance();

// done if balance is good...
if (balance > -2 and balance < 2)  return;

Node* child = NULL;
if (balance < -1)
{
// right side is heavy
child = node->right_;
}
else if (balance > 1)
{
// left side is heavy
child = node->left_;
}

int childBalance = child->balance();

// std::cout << "   ++ childBalance: " << childBalance << "\n";

// now rotate to fix inbalance
if (balance > 0)
{
// planning to rotate right..
if (childBalance <= 0)
//but if the inner child left heavy or balanced correct it
rotateLeft(node->left_);

// left side so rotate right
rotateRight(node);
}
else
{
// planning to rotate left..
if (childBalance >= 0)
//but if the inner child right heavy or balanced correct it
rotateRight(node->right_);

rotateLeft(node);
}
}

// *************************************************
// ********************* INSERT ********************
// *************************************************

template <typename T>
void AvlTree<T>::insertImpl(const T& item,
typename AvlTree<T>::Node*& node)
{
// BST tree insert
if (node == NULL)
{
node = new Node(item);
return;
}

if (cmp_(item, node->item_))
{
insertImpl(item, node->left_);
}
else
{
insertImpl(item, node->right_);
}

// now AVL tree rebalance..
rebalance(node);
}

template <typename T>
void AvlTree<T>::insert(const T& item)
{
insertImpl(item, root_);
}

// *************************************************
// ********************* ERASE *********************
// *************************************************

template <typename T>
void AvlTree<T>::replacementEraseRight(typename AvlTree<T>::Node*& node,
typename AvlTree<T>::Node*& other)
{
// we are no longer searching for the erase target..
// we are now searching for the next object after it
// the right side min

if (other->left_ != NULL)
{
// go right to the end of the left branching and do replacement erase
replacementEraseRight(node,other->left_);

// then rebalance the tree from here up
rebalance(other);
}
else
{
// swap replacement with target node so nothing odd happens
std::swap(node->item_, other->item_);

// now unlink and erase other from tree
Node* right = other->right_;
other->right_ = NULL;
delete other;

// relink others child in its place
other = right;
}
}

template <typename T>
void AvlTree<T>::eraseImpl(const T& item,
typename AvlTree<T>::Node*& node)
{
// BST tree erase
// didnt exist in the tree??
if (node == NULL) return;

//
if (cmp_(item, node->item_))          // a < b
{
eraseImpl(item, node->left_);
}
else if (cmp_(node->item_, item))     // b < a
{
eraseImpl(item, node->right_);
}
else                                  // b == a
{
// match!
if ((node->right_ == NULL) or (node->right_ == NULL))
{
Node* child = (node->right_ == NULL) ? node->left_ : node->right_;

// unlink node.. and kill it
node->right_ = NULL;
node->left_ = NULL;
delete node;

// replace with child
node = child;
}
else
{
// special case both right and left have stuff.. but we have 1 parent
// means we have to take a one of the closet (in value) nodes and replace this one
// we have 2 choices for the replacement the max of left or min of right...
replacementEraseRight(node, node->right_);

// TODO.. shouldn't this do a balance check and then go left or right based on the heavy branch??
}
}

// and then AVL tree rebalance..
rebalance(node);
}

template <typename T>
void AvlTree<T>::erase(const T& item)
{
eraseImpl(item, root_);
}

// *************************************************
// ********************* AVL MAP *******************
// *************************************************

// TODO cleanup AvlTree to AvlMap coverison class
template <typename Key,
typename Value>
class AvlMap
{
typedef std::pair<Key,Value> Pair;
typedef std::function<bool(const Pair&, const Pair&)> Comparer;
typedef AvlTree<Pair> Tree;

AvlTree<Pair> tree_;
public:
typedef typename Tree::Iterator Iterator;

AvlMap() :
tree_([] (const Pair& v1, const Pair& v2) { return v1.first < v2.first; })
{}

Value& operator[](const Key& key)
{
// hmm not such a great implementation...
Pair target(key,Value());
typename AvlTree<Pair>::Handle it = tree_.find(target);
if (it) return it->second;

tree_.insert(target);
return tree_.find(target)->second;
}

Iterator begin() { return tree_.begin(); }
Iterator end()   { return tree_.end();   }
};

// *************************************************
// *********************** TEST ********************
// *************************************************

void test()
{
// TODO honestly this inst real testing.. need to make it check the results..
{
AvlTree<uint32_t> tree;

tree.begin();
tree.end();

tree.find(1);
}

{
AvlTree<uint32_t> tree;

tree.insert(50); std::cout << tree << "\n";
tree.insert(25); std::cout << tree << "\n";
tree.insert(10); std::cout << tree << "\n";
tree.insert(40); std::cout << tree << "\n";
tree.insert(30); std::cout << tree << "\n";
tree.insert(45); std::cout << tree << "\n";
tree.insert(75); std::cout << tree << "\n";

AvlTree<uint32_t>::Handle handle = tree.find(75);
std::cout << "find:" << *handle << "\n";

tree.erase(50); std::cout << tree << "\n";
tree.erase(25); std::cout << tree << "\n";
tree.erase(10); std::cout << tree << "\n";
tree.erase(40); std::cout << tree << "\n";
tree.erase(30); std::cout << tree << "\n";
tree.erase(45); std::cout << tree << "\n";
tree.erase(75); std::cout << tree << "\n";
}

// outer rotate check
{
AvlTree<uint32_t> tree;

tree.insert(15); std::cout << tree << "\n";
tree.insert(77); std::cout << tree << "\n";
tree.insert(35); std::cout << tree << "\n";

// pair erase check
tree.erase( 1); std::cout << tree << "\n";
tree.erase(35); std::cout << tree << "\n";
tree.erase(15); std::cout << tree << "\n";
tree.erase(77); std::cout << tree << "\n";
tree.erase( 1); std::cout << tree << "\n";
}

{
AvlTree<uint32_t> tree;

tree.insert(93); std::cout << tree << "\n";
tree.insert(86); std::cout << tree << "\n";
tree.insert(87); std::cout << tree << "\n";

// pair erase check
tree.erase(99); std::cout << tree << "\n";
tree.erase(86); std::cout << tree << "\n";
tree.erase(87); std::cout << tree << "\n";
tree.erase(93); std::cout << tree << "\n";
tree.erase(99); std::cout << tree << "\n";
}
}

void randomTreeTest()
{
AvlTree<uint32_t> tree;

for (int i = 0; i < (4096-1); ++i)
{
uint32_t value = std::rand() % 100;
// std::cout << value << " ";
tree.insert(value);
}
std::cout << "\n";

// frequency count the nodes at each layer..
// thought it was ironic to use AvlTree as the map also.. just to prove that its flexible
AvlMap<int,int> freq;
for (AvlTree<uint32_t>::Iterator it = tree.begin();
it != tree.end();
++it)
{
++(freq[it.height()]);
}
std::cout << "\n";

// dump it
for (AvlMap<int,int>::Iterator it = freq.begin();
it != freq.end();
++it)
{
std::cout << it->first << ":" << it->second  << " ";
}
std::cout << "\n";
}

int main()
{
test();

randomTreeTest();
}



And the output looks like:
50(1)
25(1) 50(2)
10(1) 25(2) 50(1)
10(1) 25(3) 40(1) 50(2)
10(1) 25(3) 30(1) 40(2) 50(1)
10(1) 25(2) 30(1) 40(3) 45(1) 50(2)
10(1) 25(2) 30(1) 40(3) 45(1) 50(2) 75(1)
find:75
10(1) 25(2) 30(1) 40(3) 45(1) 75(2)
10(1) 30(2) 40(3) 45(1) 75(2)
30(1) 40(3) 45(1) 75(2)
30(1) 45(2) 75(1)
45(2) 75(1)
75(1)

15(1)
15(2) 77(1)
15(1) 35(2) 77(1)
15(1) 35(2) 77(1)
15(1) 77(2)
77(1)

93(1)
86(1) 93(2)
86(1) 87(2) 93(1)
86(1) 87(2) 93(1)
87(2) 93(1)
93(1)

1:1992 2:995 3:518 4:266 5:129 6:90 7:45 8:26 9:16 10:8 11:5 12:2 13:2 14:1