// http://www.impactinterview.com/2009/10/140-google-interview-questions/ // Given a function which produces a random integer in the // range 1 to 5, write a function which produces a random // integer in the range 1 to 7. #include <cstring> #include <cstdlib> #include <iostream> #include <functional> #include <chrono> // Note stuff the [1,5] thats just +1. -1 everywhere and a waste of time lets go [0,4] int rand5() { return std::rand() % 5; } // ok so the problem is that you need to combine the rand5 to make rand7 // the question doesnt say anything about *uniform* distribution so in theroy // u just call rand5 2 times and mod 7.. lets call this the weasel solution // cause the question was badly written int rand7_weasel() { return (rand5() + rand5()) % 7; } // ok so now the questioner has realized his mistake and corrected. // a "uniform" rand7() generator // // The above is not uniform because if use two 6 sided dice and added the rolls // the most common number is 7. Ie 7 has the most number of combos that add to it. // // So lets simplify and assume for the moment we have a more natural problem // we can generate digits [0,9] uniformly and need to create a uniform number // from [0,999].. well the answer is much more clear now you generate 3 numbers // and use them as the digits of the final number // // The same idea works for the [0,4], we have 5 numbers so we simply use base 5 // instead of base 10. This way we can make a very large uniform distributed number // // once we have a very large range of uniformly distributed numbers we can module // it by a much smaller number the binning error will be negligible // lets minimize the binning error by making the max number possible (overflow it once) // // 2^32 = 5^N // 32 log2 = N log5 // N = 13.78 int rand7_overkill() { unsigned int sum = 0; for (int i = 0; i < 14; i++) sum = (sum*5) + rand5(); return sum % 7; } // of course if you know your maths the central limit theorem says that for most distributions // when you take many independent samples and summate them the resulting distribution will // approach a another stable one.. for uniform distributions thats the normal distribution. // refer to: http://demonstrations.wolfram.com/CentralLimitTheoremForTheContinuousUniformDistribution/ // // Then add to this the fact that a small enough modulus on a normal distribution (much // much less that its variance) will make the resulting distribution look somewhat uniform // ie the wrapped normal distribution where sigma^2 approaches infinite // refer to: https://en.wikipedia.org/wiki/Wrapped_normal_distribution // // So technically this works.. but a smart interviewer is going to ask you why it works.. and // your likely to choke on it.. (i know the maths would kill me during an interview) int rand7_central_limit_weasel() { unsigned int sum = 0; for (int i = 0; i < 14; i++) sum += rand5(); return sum % 7; } // ok so now we have a working solution.. but guess what your interviewing for google.. // and they want to curve ball it.. so they ask the following.. can you improve the // performance?.. looping 14 times is too much.. a very "innocent" question // // now you might think ok well just reduce the number of iterations it that does it.. // but that is not really going to work well // // So here is the trick.. the real question is about testing your knowledge of pseudo // random generators.. so just build one.. now a pseudo random generator isn't anything // special but you have to hack one out in an interview and i don't have primes. pi and // what not memorized by heart... // // What i do know is how to generate a uniform distribution that ranges from [0,infinite].. // so all u have to do is make one big continuous rolling uniform number from the above code // and then sample from it as needed.. // // now this does raise some questions about the independence of the samples... but its faster... int rand7_pseudo() { // inject a the random sample to seeded the hash static unsigned int sum = rand5(); // the uniform sum on with the prior... sum = (sum*5) + rand5(); return sum % 7; } void test(std::function<int()> randy) { int freq[7] = {0}; std::memset(freq, 0, sizeof(freq)); auto start = std::chrono::steady_clock::now(); for (int i = 0; i < 100000; ++i) { ++freq[randy()]; } auto finish = std::chrono::steady_clock::now(); double elapsed_seconds = std::chrono::duration_cast< std::chrono::duration<double> >(finish - start).count(); for (int i=0;i<7;++i) { std::cout << "\n " << i << ":" << freq[i]; } std::cout << "\n speed:" << elapsed_seconds << "\n"; } int main() { test(rand7_weasel); test(rand7_overkill); test(rand7_central_limit_weasel); test(rand7_pseudo); }

and output looks like this:

0:11949 1:12073 2:11885 3:16060 4:19993 5:16024 6:12016 speed:0.0190048 0:14130 1:14541 2:14177 3:14296 4:14157 5:14400 6:14299 speed:0.056561 0:14264 1:14538 2:14425 3:14064 4:14476 5:14076 6:14157 speed:0.0409873 0:14272 1:14230 2:14308 3:14110 4:14214 5:14422 6:14444 speed:0.0131068

## No comments:

## Post a Comment