Thursday, June 17, 2010

Reservoir Sampling -- for an stl container..

Cant sleep.. Here is the Reservoir sampler rebuild for an stl container I assume you can see how to extend it to whatever STL compliant container is desired.

//compile using g++
#include <iostream>
#include <algorithm>
#include <iterator>

#include <vector>
#include <set>
//#include <unordered_map> 

using namespace std;

template<class Data>
class ReservoirSampler
{
  vector<Data>* samples;
  int size;
  int count;

public:
  vector<Data>* getSamples()
  { return samples;  }
  
  ReservoirSampler(int _size)
  {
    count = 0;
    size  = _size;
    samples = new vector<Data>();
    samples->resize(_size);
  }

  ~ReservoirSampler()
  {}

  void operator()(Data sample)
  {
    if(count < size)
      (*samples)[count] = sample;
    else if((rand()%count) < size)
      (*samples)[rand()%size] = sample;
    ++count;
  }
};

template<class Data>
class RandomSamplerSet : public set<Data>
{
public:
  void add(Data data)
  { 
    //O(logN)
    insert(data); 
  }
  
  void remove(Data data)
  {
    //O(logN)
    erase(data);
  }
  
  vector<Data>* sample(int k)
  {
    //O(N)
    ReservoirSampler<Data> sampler(k);
    for_each(this->begin(), this->end(), sampler);
    return sampler.getSamples();
  }
};

#define LIMIT 1000
void createRandSamples(RandomSamplerSet<int>* container, int size)
{
  for(int i = 0; i < size; i++)
    container->add(rand()%LIMIT);
}

template<class itor>
void printOut(itor start, itor end)
{
  // *copy* the data into the output stream
  copy (start, end, ostream_iterator<int>(cout, " ")); 
  cout << endl;
}

#define SIZE    20
#define SAMPLES 5
int main()
{
  srand(time(NULL));

  RandomSamplerSet<int> data;
  createRandSamples(&data, SIZE); 
  printOut(data.begin(), data.end());

  vector<int>* samples = data.sample(SAMPLES);
  printOut(samples->begin(), samples->end());
  delete samples;
}

No comments:

Post a Comment