Sunday, January 1, 2012

Code Enablement control

I recently re-encountered an interesting design problem: The requirement to roll out code with the ability to quickly and globally disable it at run time.

System like Googles search engine are 100% online, they use the idea of machines sets and stages where they roll out new versions the code, boot up the system and then enable/disable parts of the system as they trial the new algorithms.

So how can this be done? Well there are many approaches but the probably break down to 2 main categories.

  • Enablement Switches
  • Runtime replaceable code parts.
An Enable Switch is a boolean or value test that filters the entry into specific region(s) of the code
  • Advantage:
    1. micro level Control
  • Disadvantage:
    1. Generally this requires the constant check of a bool in hash or shared mem.
    2. Code is statical linked so if its wrong you will need to replace the whole processes code
Runtime replaceable code can be anything from whole processes, dynamic libs or so other plugin system
  • Advantage:
    1. Dynamically able to add/replace libs at runtime when things go wrong or a clear tweak is visable
  • Disadvantage:
    1. It is Marco level. Ie requires more larger chunks of code to be loaded/unload
    2. cant handle extensive interface changes over the plugin interface.
Ideally the solution is probably a mix of the above. So the bit Enablement system needs to be dynamically able to expand and register new access keys as needed. This also accounts for process variations over heterogeneous machines and dynamically replaced code. Basically the following solution is a shared memory hash implementation that is able to be accessed and loaded from independent processes. Be careful of it as I have been a bit lazy with its mutex locking and it will probably have some concurrency issues. Also keep in mind its a hash so its more efficient to over allocate the hash size vs the actually number of used keys. As always there are some other problems with the code but you can work them out for an actual production implementation. Anywhere here is my hacked up prototype of the system.
// compile with:
// g++ -I"c:\tools\boost_1_45_0" -L"c:\tools\boost_1_45_0\stage\lib" -static enable_control_flag.cpp -o enable_control_flag.exe

//#include <boost/interprocess/shared_memory_object.hpp>
#include <boost/interprocess/windows_shared_memory.hpp>
#include <boost/interprocess/mapped_region.hpp>
//#include <boost/thread/thread.hpp>
#include <boost/functional/hash.hpp>

#include <iostream>
#include <iomanip>
#include <string>

// for windows Sleep!
#include <Windows.h>

#define KEY_MAX 64

struct EnableControlSwitch
{
  //realy this is just for a chuck of shared memory
  EnableControlSwitch() :
    state_(false),
    hash_(0)    
  {}

  uint32_t      hash_;
  bool          state_;
  char          key_[KEY_MAX];
  //expiry date...
  //creator process... so u can trak where its coming from.
};

using namespace boost::interprocess;
class EnableControl
{
public:
  enum { 
    MAX_ENTRIES = 256,   //best to keep this as a power of 2 for speed
    MEM_SIZE     = (MAX_ENTRIES*sizeof(EnableControlSwitch))
  };
  
  static EnableControl& instance() 
  { 
    static EnableControl me; 
    return me;
  }

  EnableControlSwitch& create(const std::string& key)  { return get(key, true); }
  EnableControlSwitch& get(const std::string& key,
      bool create = false)
  {
    //locate or register
    if(key.length() > KEY_MAX-1) throw std::runtime_error("Key to big");

    uint32_t hash = boost::hash_value(key);
    uint32_t loc  = hash;
    
    while (hash != 0)
      {
 uint32_t idx = loc % MAX_ENTRIES; 
 if(switches_[idx].hash_ == 0)
   {
     if (!create)
       throw std::runtime_error("Unknown Key");
     
     //free location!
     //lockless version... assume atomic...
     switches_[idx].hash_ = hash;
     if(switches_[idx].hash_ == hash)
       {
  //proceed
  std::memcpy(switches_[idx].key_, 
       key.c_str(), 
       key.length());
  switches_[idx].key_[key.length()] = '\0'; 
  return switches_[idx];
       }
   }
 else if(switches_[idx].hash_ == hash)
   {
     return switches_[idx];
   }
 else
   {
     //nothing went wrong and it was occupied.. next possible place
     loc = loc / MAX_ENTRIES;
   }
      }
    
    throw std::runtime_error("To many key conflicts");
  }

  bool& state(const std::string& key)
  {
    EnableControlSwitch& aSwitch = get(key);
    return aSwitch.state_;
  }

  void enable(const std::string key)  { state(key) = true; }
  void disable(const std::string key) { state(key) = false; }

  std::ostream& printAll(std::ostream& out) const
  {
    for(uint32_t idx = 0; idx < MAX_ENTRIES; idx++)
      {
 if(switches_[idx].hash_ != 0)
   out << std::hex
       << "Idx:"    << idx
       << " Hash:"  << switches_[idx].hash_
       << " Key:"   << switches_[idx].key_
       << " State:" << switches_[idx].state_
       << "\n";
      }
    return out;
  }
  
private:
  EnableControl() :
    shm_(NULL),
    region_(NULL)
  {
    //setup shared mem
    shm_ = new windows_shared_memory(open_or_create, "SharedEnableControls", read_write, MEM_SIZE);
    region_ = new mapped_region(*shm_, read_write);
    std::memset(region_->get_address(), 0, region_->get_size());
    switches_ = static_cast<EnableControlSwitch*>(region_->get_address());
  }

  ~EnableControl()
  {
    delete region_;
    delete shm_;
  }

  windows_shared_memory* shm_;
  mapped_region*         region_;
  EnableControlSwitch*   switches_;
};

std::ostream& operator<<(std::ostream& out, EnableControl const& control)
{
  return control.printAll (out);
}

void master()
{
  //should use an allocator... but lets keep it simple for now...
  EnableControl& ctrl = EnableControl::instance();

  const EnableControlSwitch& a = ctrl.create("a"); //high speed Enable point (share mem ref copy)
  const EnableControlSwitch& b = ctrl.create("b"); 
  const EnableControlSwitch& c = ctrl.create("c"); 
  const EnableControlSwitch& exit = ctrl.create("exit"); 

  while (!exit.state_)
    {
      try 
 {
   std::cout << EnableControl::instance();
      
   std::cout << "sleeping..\n";
   //boost::this_thread::sleep(boost::posix_time::seconds(1));
   Sleep(1000);

   if(a.state_)
       std::cout << "a\n";
   if(b.state_)
       std::cout << "b\n";
   if(c.state_)
       std::cout << "c\n";
 }
      catch(std::exception& e)
 {
   std::cout << e.what();
 }
    }    
}

void slave(std::string cmd, std::string key)
{
  try
    {
      if(cmd == "status")
 std::cout << EnableControl::instance();
      else if(cmd == "enable")
 EnableControl::instance().enable(key);
      else if(cmd == "disable")
 EnableControl::instance().disable(key);
      else if(cmd == "info")
 std::cout << "Key:" << key << " is " << (EnableControl::instance().state(key) ? "ON" : "OFF") << "\n";
      else
 std::cout << "Unknown Command:" << cmd << " Key:" << key << "\n";
    }
  catch(std::exception& e)
    {
      std::cout << "Error: " << e.what() << "\n";
    }
}

int main(int argc, char const * const *argv)
{
  //this is a 2 process system 
  // master is run with no parameters
  // slave is one of the aboved listed commands + the option key.

  if (argc == 1)
    master();
  else if (argc == 2)
    slave(argv[1],"");
  else
    slave(argv[1],argv[2]);
}
Some improvements the come to mind are:
  1. At compile time, compute the the string hash using boost::mpl::strings, to squeeze some extra mill secs in at the start up. but that is serious over kill IMHO. Better to improve the hash collision algorithm.
  2. Add the ability to delete hash entries.
  3. Add the ability to add pages for switches so that the compile time switch limit can be exceeded.
  4. A way to cleanly initialization the switch system.