// http://www.impactinterview.com/2009/10/140-google-interview-questions/
// Given a function which produces a random integer in the
// range 1 to 5, write a function which produces a random
// integer in the range 1 to 7.
#include <cstring>
#include <cstdlib>
#include <iostream>
#include <functional>
#include <chrono>
// Note stuff the [1,5] thats just +1. -1 everywhere and a waste of time lets go [0,4]
int rand5()
{
return std::rand() % 5;
}
// ok so the problem is that you need to combine the rand5 to make rand7
// the question doesnt say anything about *uniform* distribution so in theroy
// u just call rand5 2 times and mod 7.. lets call this the weasel solution
// cause the question was badly written
int rand7_weasel()
{
return (rand5() + rand5()) % 7;
}
// ok so now the questioner has realized his mistake and corrected.
// a "uniform" rand7() generator
//
// The above is not uniform because if use two 6 sided dice and added the rolls
// the most common number is 7. Ie 7 has the most number of combos that add to it.
//
// So lets simplify and assume for the moment we have a more natural problem
// we can generate digits [0,9] uniformly and need to create a uniform number
// from [0,999].. well the answer is much more clear now you generate 3 numbers
// and use them as the digits of the final number
//
// The same idea works for the [0,4], we have 5 numbers so we simply use base 5
// instead of base 10. This way we can make a very large uniform distributed number
//
// once we have a very large range of uniformly distributed numbers we can module
// it by a much smaller number the binning error will be negligible
// lets minimize the binning error by making the max number possible (overflow it once)
//
// 2^32 = 5^N
// 32 log2 = N log5
// N = 13.78
int rand7_overkill()
{
unsigned int sum = 0;
for (int i = 0; i < 14; i++)
sum = (sum*5) + rand5();
return sum % 7;
}
// of course if you know your maths the central limit theorem says that for most distributions
// when you take many independent samples and summate them the resulting distribution will
// approach a another stable one.. for uniform distributions thats the normal distribution.
// refer to: http://demonstrations.wolfram.com/CentralLimitTheoremForTheContinuousUniformDistribution/
//
// Then add to this the fact that a small enough modulus on a normal distribution (much
// much less that its variance) will make the resulting distribution look somewhat uniform
// ie the wrapped normal distribution where sigma^2 approaches infinite
// refer to: https://en.wikipedia.org/wiki/Wrapped_normal_distribution
//
// So technically this works.. but a smart interviewer is going to ask you why it works.. and
// your likely to choke on it.. (i know the maths would kill me during an interview)
int rand7_central_limit_weasel()
{
unsigned int sum = 0;
for (int i = 0; i < 14; i++)
sum += rand5();
return sum % 7;
}
// ok so now we have a working solution.. but guess what your interviewing for google..
// and they want to curve ball it.. so they ask the following.. can you improve the
// performance?.. looping 14 times is too much.. a very "innocent" question
//
// now you might think ok well just reduce the number of iterations it that does it..
// but that is not really going to work well
//
// So here is the trick.. the real question is about testing your knowledge of pseudo
// random generators.. so just build one.. now a pseudo random generator isn't anything
// special but you have to hack one out in an interview and i don't have primes. pi and
// what not memorized by heart...
//
// What i do know is how to generate a uniform distribution that ranges from [0,infinite]..
// so all u have to do is make one big continuous rolling uniform number from the above code
// and then sample from it as needed..
//
// now this does raise some questions about the independence of the samples... but its faster...
int rand7_pseudo()
{
// inject a the random sample to seeded the hash
static unsigned int sum = rand5();
// the uniform sum on with the prior...
sum = (sum*5) + rand5();
return sum % 7;
}
void test(std::function<int()> randy)
{
int freq[7] = {0};
std::memset(freq, 0, sizeof(freq));
auto start = std::chrono::steady_clock::now();
for (int i = 0; i < 100000; ++i)
{
++freq[randy()];
}
auto finish = std::chrono::steady_clock::now();
double elapsed_seconds = std::chrono::duration_cast<
std::chrono::duration<double> >(finish - start).count();
for (int i=0;i<7;++i)
{
std::cout << "\n " << i << ":" << freq[i];
}
std::cout << "\n speed:" << elapsed_seconds
<< "\n";
}
int main()
{
test(rand7_weasel);
test(rand7_overkill);
test(rand7_central_limit_weasel);
test(rand7_pseudo);
}
and output looks like this:
0:11949 1:12073 2:11885 3:16060 4:19993 5:16024 6:12016 speed:0.0190048 0:14130 1:14541 2:14177 3:14296 4:14157 5:14400 6:14299 speed:0.056561 0:14264 1:14538 2:14425 3:14064 4:14476 5:14076 6:14157 speed:0.0409873 0:14272 1:14230 2:14308 3:14110 4:14214 5:14422 6:14444 speed:0.0131068
No comments:
Post a Comment