## Saturday, July 2, 2016

### google interview question: build a random generator

// http://www.impactinterview.com/2009/10/140-google-interview-questions/
// Given a function which produces a random integer in the
// range 1 to 5, write a function which produces a random
// integer in the range 1 to 7.

#include <cstring>
#include <cstdlib>
#include <iostream>
#include <functional>
#include <chrono>

// Note stuff the [1,5] thats just +1. -1 everywhere and a waste of time lets go [0,4]

int rand5()
{
return std::rand() % 5;
}

// ok so the problem is that you need to combine the rand5 to make rand7
// the question doesnt say anything about *uniform* distribution so in theroy
// u just call rand5 2 times and mod 7.. lets call this the weasel solution
// cause the question was badly written

int rand7_weasel()
{
return (rand5() + rand5()) % 7;

}

// ok so now the questioner has realized his mistake and corrected.
// a "uniform" rand7() generator
//
// The above is not uniform because if use two 6 sided dice and added the rolls
// the most common number is 7. Ie 7 has the most number of combos that add to it.
//
// So lets simplify and assume for the moment we have a more natural problem
// we can generate digits [0,9] uniformly and need to create a uniform number
// from [0,999].. well the answer is much more clear now you generate 3 numbers
// and use them as the digits of the final number
//
// The same idea works for the [0,4], we have 5 numbers so we simply use base 5
// instead of base 10. This way we can make a very large uniform distributed number
//
// once we have a very large range of uniformly distributed numbers we can module
// it by a much smaller number the binning error will be negligible

// lets minimize the binning error by making the max number possible (overflow it once)
//
// 2^32 = 5^N
// 32 log2 = N log5
// N = 13.78

int rand7_overkill()
{
unsigned int sum = 0;
for (int i = 0; i < 14; i++)
sum = (sum*5) + rand5();

return sum % 7;
}

// of course if you know your maths the central limit theorem says that for most distributions
// when you take many independent samples and summate them the resulting distribution will
// approach a another stable one.. for uniform distributions thats the normal distribution.
//  refer to: http://demonstrations.wolfram.com/CentralLimitTheoremForTheContinuousUniformDistribution/
//
// Then add to this the fact that a small enough modulus on a normal distribution (much
// much less that its variance) will make the resulting distribution look somewhat uniform
//  ie the wrapped normal distribution where sigma^2 approaches infinite
//  refer to: https://en.wikipedia.org/wiki/Wrapped_normal_distribution
//
// So technically this works.. but a smart interviewer is going to ask you why it works.. and
// your likely to choke on it.. (i know the maths would kill me during an interview)

int rand7_central_limit_weasel()
{
unsigned int sum = 0;
for (int i = 0; i < 14; i++)
sum += rand5();

return sum % 7;
}

// ok so now we have a working solution.. but guess what your interviewing for google..
// and they want to curve ball it.. so they ask the following.. can you improve the
// performance?.. looping 14 times is too much.. a very "innocent" question
//
// now you might think ok well just reduce the number of iterations it that does it..
// but that is not really going to work well
//
// So here is the trick.. the real question is about testing your knowledge of pseudo
// random generators.. so just build one.. now a pseudo random generator isn't anything
// special but you have to hack one out in an interview and i don't have primes. pi and
// what not memorized by heart...
//
// What i do know is how to generate a uniform distribution that ranges from [0,infinite]..
// so all u have to do is make one big continuous rolling uniform number from the above code
// and then sample from it as needed..
//
// now this does raise some questions about the independence of the samples... but its faster...

int rand7_pseudo()
{
// inject a the random sample to seeded the hash
static unsigned int sum = rand5();

// the uniform sum on with the prior...
sum = (sum*5) + rand5();

return sum % 7;
}

void test(std::function<int()> randy)
{
int freq[7] = {0};
std::memset(freq, 0, sizeof(freq));

auto start = std::chrono::steady_clock::now();

for (int i = 0; i < 100000; ++i)
{
++freq[randy()];
}

auto finish = std::chrono::steady_clock::now();
double elapsed_seconds = std::chrono::duration_cast<
std::chrono::duration<double> >(finish - start).count();

for (int i=0;i<7;++i)
{
std::cout << "\n " << i << ":" << freq[i];
}
std::cout << "\n speed:" << elapsed_seconds
<< "\n";
}

int main()
{
test(rand7_weasel);
test(rand7_overkill);

test(rand7_central_limit_weasel);
test(rand7_pseudo);
}


and output looks like this:

 0:11949
1:12073
2:11885
3:16060
4:19993
5:16024
6:12016
speed:0.0190048

0:14130
1:14541
2:14177
3:14296
4:14157
5:14400
6:14299
speed:0.056561

0:14264
1:14538
2:14425
3:14064
4:14476
5:14076
6:14157
speed:0.0409873

0:14272
1:14230
2:14308
3:14110
4:14214
5:14422
6:14444
speed:0.0131068