//compile using g++ #include <iostream> #include <algorithm> #include <iterator> #include <vector> #include <set> //#include <unordered_map> using namespace std; template<class Data> class ReservoirSampler { vector<Data>* samples; int size; int count; public: vector<Data>* getSamples() { return samples; } ReservoirSampler(int _size) { count = 0; size = _size; samples = new vector<Data>(); samples->resize(_size); } ~ReservoirSampler() {} void operator()(Data sample) { if(count < size) (*samples)[count] = sample; else if((rand()%count) < size) (*samples)[rand()%size] = sample; ++count; } }; template<class Data> class RandomSamplerSet : public set<Data> { public: void add(Data data) { //O(logN) insert(data); } void remove(Data data) { //O(logN) erase(data); } vector<Data>* sample(int k) { //O(N) ReservoirSampler<Data> sampler(k); for_each(this->begin(), this->end(), sampler); return sampler.getSamples(); } }; #define LIMIT 1000 void createRandSamples(RandomSamplerSet<int>* container, int size) { for(int i = 0; i < size; i++) container->add(rand()%LIMIT); } template<class itor> void printOut(itor start, itor end) { // *copy* the data into the output stream copy (start, end, ostream_iterator<int>(cout, " ")); cout << endl; } #define SIZE 20 #define SAMPLES 5 int main() { srand(time(NULL)); RandomSamplerSet<int> data; createRandSamples(&data, SIZE); printOut(data.begin(), data.end()); vector<int>* samples = data.sample(SAMPLES); printOut(samples->begin(), samples->end()); delete samples; }
Thursday, June 17, 2010
Reservoir Sampling -- for an stl container..
Cant sleep.. Here is the Reservoir sampler rebuild for an stl container I assume you can see how to extend it to whatever STL compliant container is desired.
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment