//compile using g++
#include <iostream>
#include <algorithm>
#include <iterator>
#include <vector>
#include <set>
//#include <unordered_map>
using namespace std;
template<class Data>
class ReservoirSampler
{
vector<Data>* samples;
int size;
int count;
public:
vector<Data>* getSamples()
{ return samples; }
ReservoirSampler(int _size)
{
count = 0;
size = _size;
samples = new vector<Data>();
samples->resize(_size);
}
~ReservoirSampler()
{}
void operator()(Data sample)
{
if(count < size)
(*samples)[count] = sample;
else if((rand()%count) < size)
(*samples)[rand()%size] = sample;
++count;
}
};
template<class Data>
class RandomSamplerSet : public set<Data>
{
public:
void add(Data data)
{
//O(logN)
insert(data);
}
void remove(Data data)
{
//O(logN)
erase(data);
}
vector<Data>* sample(int k)
{
//O(N)
ReservoirSampler<Data> sampler(k);
for_each(this->begin(), this->end(), sampler);
return sampler.getSamples();
}
};
#define LIMIT 1000
void createRandSamples(RandomSamplerSet<int>* container, int size)
{
for(int i = 0; i < size; i++)
container->add(rand()%LIMIT);
}
template<class itor>
void printOut(itor start, itor end)
{
// *copy* the data into the output stream
copy (start, end, ostream_iterator<int>(cout, " "));
cout << endl;
}
#define SIZE 20
#define SAMPLES 5
int main()
{
srand(time(NULL));
RandomSamplerSet<int> data;
createRandSamples(&data, SIZE);
printOut(data.begin(), data.end());
vector<int>* samples = data.sample(SAMPLES);
printOut(samples->begin(), samples->end());
delete samples;
}
Thursday, June 17, 2010
Reservoir Sampling -- for an stl container..
Cant sleep.. Here is the Reservoir sampler rebuild for an stl container I assume you can see how to extend it to whatever STL compliant container is desired.
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment