tag:blogger.com,1999:blog-6121718088907075532024-03-05T23:54:58.775+09:00The Coding Slim JimAhh.. who gave the code-monkey a wrench?Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.comBlogger271125tag:blogger.com,1999:blog-612171808890707553.post-60058078755873624252017-08-17T23:57:00.002+09:002017-08-18T00:03:31.968+09:00Q learning - and its core flaws<style>
.inline li pre {
display: inline;
}
.inline li div {
display: inline;
}
.inline li p {
display: inline;
}
</style><br />
<br />
<script src="https://d3js.org/d3.v3.min.js"></script><br />
<script>
// **************************** tree render/animator handles
function make_tree(outer, data, effects)
{
var height = 500;
var width = 450;
var id = 0;
var duration = 750;
var radius = 30;
var root;
var tree = d3.layout.tree()
.size([height, width]);
var diagonal = d3.svg.diagonal()
.projection(function(d) { return [d.x, d.y]; });
var treeGroup = outer.append("g")
.attr("transform", function(d) { return "translate(50,50)"; });
function update(source)
{
// Compute the new tree layout.
var nodes = tree.nodes(root).reverse(),
links = tree.links(nodes);
// Normalize for fixed-depth.
nodes.forEach(function(d) { d.y = d.depth * 180; });
// Update the nodes…
var node = treeGroup.selectAll("g.node")
.data(nodes, function(d) { return d.id || (d.id = ++id); });
// Enter any new nodes at the parent's previous position.
var nodeEnter = node.enter().append("g")
.attr("class", "node")
.on("click", show_hide);
nodeEnter.append("circle")
.attr("r", 1e-6)
.attr("y", 30)
.style("fill", function(d) { return d._children ? "lightsteelblue" : "#fff"; });
if (typeof(effects) !== 'undefined')
{
nodeEnter.append("polygon")
.attr("points",function(d) {
if ( effects[d.effect] != undefined)
{
return effects[d.effect].map(function(p) {
return [p.x,p.y].join(","); }).join(" ");
}
return "";
})
//.style("stroke", "steelblue")
//.style("stroke-width", "3px")
//.attr("fill", "none");
.attr("fill", function(d) { return d.mark ? "#ce0022" : "steelblue" });
}
nodeEnter.append("text")
.attr("dx", 0)
.attr("dy", ".35em")
.attr("text-anchor", "middle")
.text(function(d) { return d.value; })
.style("fill-opacity", 1e-6);
// Transition nodes to their new position.
var nodeUpdate = node.transition()
.duration(duration)
.attr("transform", function(d) { return "translate(" + d.x + "," + d.y + ")"; });
nodeUpdate.select("circle")
.attr("r", radius)
.style("stroke", function(d) { return d.mark ? "#ce0022" : "steelblue" })
.style("stroke-width", "3px")
.style("fill", function(d) { return d._children ? "lightsteelblue" : "#fff"; });
nodeUpdate.select("polygon")
.style("fill", function(d) { return d.mark ? "#ce0022" : "steelblue" });
nodeUpdate.select("text")
.text(function(d) { return d.value; })
.style("fill-opacity", 1);
// Transition exiting nodes to the parent's new position.
var nodeExit = node.exit().transition()
.duration(duration)
.attr("transform", function(d) { return "translate(" + source.x + "," + source.y + ")"; })
.remove();
nodeExit.select("circle")
.attr("r", 1e-6);
nodeExit.select("text")
.text(function(d) { return d.value; })
.style("fill-opacity", 1e-6);
// Update the links…
var link = treeGroup.selectAll("path.link")
.data(links, function(d) { return d.target.id; });
// Enter any new links at the parent's previous position.
link.enter().insert("path", "g")
.attr("class", "link")
.style("stroke", "#ccc")
.style("stroke-width", "2px")
.style("fill", "none")
.attr("d", function(d) {
var o = {x: source.x0, y: source.y0};
return diagonal({source: o, target: o});
});
// Transition links to their new position.
link.transition()
.duration(duration)
.attr("d", diagonal);
// Transition exiting nodes to the parent's new position.
link.exit().transition()
.duration(duration)
.attr("d", function(d) {
var o = {x: source.x, y: source.y};
return diagonal({source: o, target: o});
})
.remove();
// Stash the old positions for transition.
nodes.forEach(function(d) {
d.x0 = d.x;
d.y0 = d.y;
});
}
function show_hide(d) {
if (d.children) {
d._children = d.children;
d.children = null;
} else {
d.children = d._children;
d._children = null;
}
update(d);
}
function highlight(d) {
if (d.mark) {
d.mark = false;
} else {
d.mark = true;
}
update(d);
}
function change_text(d, value) {
d.value = value;
update(d);
}
root = data;
root.x0 = width / 2;
root.y0 = 200;
update(root);
return {root: root,
group: treeGroup,
show_hide: show_hide,
highlight: highlight,
change_text: change_text};
}
function install_animate(outer, animate_steps)
{
var size_c = 15;
var size_c0 = 5;
var space = 3;
var minor_idx = 0;
var playing = false;
var controlsLayer = null;
function step_forward()
{
console.log("step_forward");
if (!playing) return;
minor_idx += 1;
if (minor_idx >= animate_steps.length)
{
minor_idx = animate_steps.length;
playing = false;
return;
//minor_idx = 0;
}
update();
}
function update()
{
console.log("update", minor_idx);
// do the required action..
animate_steps[minor_idx].action();
// setup the timer call back (if we are animmating)
if (playing)
{
console.log("timer", animate_steps[minor_idx].timeOut);
var makeCallback = function() {
// note that we're returning a new callback function each time
return function() {
step_forward();
return true;
}
};
d3.timer(makeCallback(), animate_steps[minor_idx].timeOut);
controlsLayer
.style('opacity', 0.1);
}
else
{
controlsLayer
.style('opacity', 0.8);
}
}
function play()
{
console.log("play");
playing = !playing;
update();
}
function minor_forward()
{
playing = false;
if (minor_idx >= animate_steps.length)
{
minor_idx = animate_steps.length - 1;
}
update();
minor_idx += 1;
}
var controls = [
{i:0, action:play, shape:[[0,0],[size_c,size_c],[0,2*size_c]]},
{i:1, action:minor_forward, shape:[
[size_c0,0],[0,0],[0,2*size_c],[size_c0,2*size_c],[size_c0,0],
[2*size_c0,0],[size_c+2*size_c0,size_c],[2*size_c0,2*size_c],[2*size_c0,0]]},
];
var off_x = width - controls.length*(2*size_c+space);
var off_y = height - 2*size_c;
controlsLayer = outer.append("g")
.style('opacity', 0.8)
.attr("transform", "translate(0,0)");
controlsLayer.selectAll("control")
.data(controls)
.enter()
.append("polygon")
.attr("points",function(d) {
return d.shape.map(function(e) { return e.join(","); }).join(" ");
})
.on("click", function(d) { d.action(); })
.attr("transform", function(d) { return "translate(" + ((2*d.i+1)*size_c + d.i*space) + ",0)" })
.style("fill", "grey");
}
</script><br />
<br />
A few days ago i gave a talk on Reinforcement learning with a focus on Q-learning at Cookpads Tokyo office. https://www.meetup.com/tokyo-machine-learning-kitchen/events/242060161/<br />
<br />
The main slides for the talk are here https://github.com/ashleysmart/mlgym/blob/master/qlearning/main_slides.html<br />
<br />
I have been neglecting my blog lately so i figured i would convert the slides into a post so lets get started.<br />
<br />
<br />
The Q function is a estimate of the systems potential value. It is accessed based on:<br />
<ul><li> The environment or state that the system is in<br />
<li> The actions that can be taken from that state <br />
<li> The rewards that can be acquired by performing the action<br />
</ul>The Q function is the target function that we are trying to learn and it can be implemented as a neural network, table or some other standard linear regression machine learning system or function. The textbook formula: <center><pre class="render:latex">Q'(s_t, a_t)=Q(s_t,a_t)+\alpha\Big(r_t+\gamma\max_{a}Q(s_{t+1},a)-Q(s_t,a_t)\Big)</pre></center>Where: <ul class="inline"><li> <pre class="render:latex">Q</pre><p>: The function that guess the total 'value' of rewards</p><li> <pre class="render:latex">Q</pre><p>: The new iteration of the 'value'</p><li> <pre class="render:latex">s_t</pre><p>: The “State” of the environment at time 't'</p><li> <pre class="render:latex">a_t</pre><p>: The “action” perform at time 't'</p><li> <pre class="render:latex">r_t</pre><p>: The “reward” received for the action at 't'</p><li> <pre class="render:latex">s_{t+1}</pre><p>: The “State” of the environment after action at time 't'</p><li> <pre class="render:latex">a</pre><p>: A possible action performed from state 't+1'</p><li> <pre class="render:latex">\alpha</pre><p>: The learning rate, how quickly to adjust when wrong. This limited between 0 and 1</p><li> <pre class="render:latex">\gamma</pre><p>: The discount rate, how important/trusted future rewards are. This limited between 0 and 1. and has a effect that can be considered as a EMA(exponential moving average)</p></ul>Of course everyone understood that... If we manipulate the formula a bit how it works becomes much more clear Starting with the textbook form: <center><pre style="align:center" class="render:latex">Q'(s_t, a_t)=Q(s_t,a_t)+\alpha\Big(r_t+\gamma\max_{a}Q(s_{t+1},a)-Q(s_t,a_t)\Big)</pre></center>We notice that <pre class="render:latex">Q'(s_t, a_t)</pre>is in several places so we can group it together.. <center><pre class="render:latex">Q'(s_t, a_t)=(1-\alpha)Q(s_t, a_t)+\alpha\Big(r_t+\gamma\max_{a}Q(s_{t+1}, a)\Big)</pre></center>Then we can group the non-Q terms into a common item <center><pre class="render:latex">Q_{target}=r_t+\gamma\max_{a}Q(s_{t+1}, a)</pre></center>And Finally we have something very clear <center><pre class="render:latex">Q_{new}=(1-\alpha)Q_{target}+\alpha Q_{target}</pre></center>As you can see alpha is acting as a ratio to merge the current value of Q with target value of Q and new info for the next iteration Also given that Q-learning does percential mixes 2 numbers to produce a third then when learning is complete and stable all 3 parts will match ie: <center><pre class="render:latex">Q_{target} \approx Q_{current} \approx Q_{new}</pre></center>So the core of what it learns is: <center><pre class="render:latex">Q_{final} \approx Q_{target} = r_t+\gamma\max_{a} Q(s_{t+1},a)</pre></center>This is just an recursive formula and com sci guys can often will instantly associate dynamic programming and tree diagrams with it. Ok so now lets have a look at how this works solutions to problems Each circle represents a world state, the number on the left are the instant reward for that state and the the number on the right is the current Q value for that state. <div id="tree1"></div><script>
var tree1 =
{
"value": "0 / ?",
"children":
[
{
"value": "+1 / ?", //"+1 / 3",
"children":
[
{
"value": "+1 / ?", //"+1 / 2"
"children":
[
{
"value": "-1 / -1"
},
{
"value": "0 / 0"
},
{
"value": "+1 / 1"
},
{
"value": "0 / 0"
}
]
},
{
"value": "0 / 0"
},
{
"value": "-1 / -1"
},
{
"value": "+1 / 1"
}
]
},
{
"value": "0 / ?", // "0 / 4",
//"mark": true,
"children":
[
{
"value": "-1 / ?", // "-1 / 4",
//"mark": true,
"children":
[
{
"value": "+5 / 5",
//"mark": true
},
{
"value": "0 / 0"
},
]
},
{
"value": "+1 / 1"
}
]
},
]
};
var width = 600;
var height = 700;
var svg = d3.select("#tree1").append("svg")
.attr("display", "block")
.attr("margin", "auto")
.attr("width", width)
.attr("height", height);
var treeHdl = make_tree(svg, tree1);
var step_speed = 2000;
var animate_steps = [
{
action: function() {
treeHdl.highlight(treeHdl.root.children[0].children[0].children[0]);
treeHdl.highlight(treeHdl.root.children[0].children[0].children[1]);
treeHdl.highlight(treeHdl.root.children[0].children[0].children[2]);
treeHdl.highlight(treeHdl.root.children[0].children[0].children[3]);
},
timeOut: step_speed
},
{
action: function() {
treeHdl.highlight(treeHdl.root.children[0].children[0]);
},
timeOut: step_speed
},
{
action: function() {
// remove prior..
treeHdl.highlight(treeHdl.root.children[0].children[0].children[0]);
treeHdl.highlight(treeHdl.root.children[0].children[0].children[1]);
treeHdl.highlight(treeHdl.root.children[0].children[0].children[2]);
treeHdl.highlight(treeHdl.root.children[0].children[0].children[3]);
treeHdl.highlight(treeHdl.root.children[0].children[0]);
treeHdl.change_text(treeHdl.root.children[0].children[0], "+1 / 2");
},
timeOut: step_speed
},
// right extreame update
{
action: function() {
// highliht next
treeHdl.highlight(treeHdl.root.children[1].children[0].children[0]);
treeHdl.highlight(treeHdl.root.children[1].children[0].children[1]);
},
timeOut: step_speed
},
{
action: function() {
treeHdl.highlight(treeHdl.root.children[1].children[0]);
},
timeOut: step_speed
},
{
action: function() {
// remove prior
treeHdl.highlight(treeHdl.root.children[1].children[0].children[0]);
treeHdl.highlight(treeHdl.root.children[1].children[0].children[1]);
treeHdl.highlight(treeHdl.root.children[1].children[0]);
treeHdl.change_text(treeHdl.root.children[1].children[0], "-1 / 4");
},
timeOut: step_speed
},
// left mid update
{
action: function() {
treeHdl.highlight(treeHdl.root.children[0].children[0]);
treeHdl.highlight(treeHdl.root.children[0].children[1]);
treeHdl.highlight(treeHdl.root.children[0].children[2]);
treeHdl.highlight(treeHdl.root.children[0].children[3]);
},
timeOut: step_speed
},
{
action: function() {
treeHdl.highlight(treeHdl.root.children[0]);
},
timeOut: step_speed
},
{
action: function() {
// remove prior..
treeHdl.highlight(treeHdl.root.children[0].children[0]);
treeHdl.highlight(treeHdl.root.children[0].children[1]);
treeHdl.highlight(treeHdl.root.children[0].children[2]);
treeHdl.highlight(treeHdl.root.children[0].children[3]);
treeHdl.highlight(treeHdl.root.children[0]);
treeHdl.change_text(treeHdl.root.children[0], "+1 / 3");
},
timeOut: step_speed
},
// right mid update
{
action: function() {
treeHdl.highlight(treeHdl.root.children[1].children[0]);
treeHdl.highlight(treeHdl.root.children[1].children[1]);
},
timeOut: step_speed
},
{
action: function() {
treeHdl.highlight(treeHdl.root.children[1]);
},
timeOut: step_speed
},
{
action: function() {
// remove prior..
treeHdl.highlight(treeHdl.root.children[1].children[0]);
treeHdl.highlight(treeHdl.root.children[1].children[1]);
treeHdl.highlight(treeHdl.root.children[1]);
treeHdl.change_text(treeHdl.root.children[1], "0 / 4");
},
timeOut: step_speed
},
// root update
{
action: function() {
treeHdl.highlight(treeHdl.root.children[0]);
treeHdl.highlight(treeHdl.root.children[1]);
},
timeOut: step_speed
},
{
action: function() {
treeHdl.highlight(treeHdl.root);
},
timeOut: step_speed
},
{
action: function() {
// remove prior..
treeHdl.highlight(treeHdl.root.children[0]);
treeHdl.highlight(treeHdl.root.children[1]);
treeHdl.highlight(treeHdl.root);
treeHdl.change_text(treeHdl.root, "0 / 4");
},
timeOut: step_speed
},
{
action: function() {
// highlight best
treeHdl.highlight(treeHdl.root);
treeHdl.highlight(treeHdl.root.children[1]);
treeHdl.highlight(treeHdl.root.children[1].children[0]);
treeHdl.highlight(treeHdl.root.children[1].children[0].children[0]);
},
timeOut: step_speed
},
];
install_animate(svg, animate_steps);
</script> But there is of course a problem: Pay very close attention to the formulas.. <center><pre class="render:latex">"Q_{new}=(1-\alpha)Q_{current}+\alpha Q_{update}</pre></center>Note that: <ul><li>The forumla is iterative<br />
<li>The is top down<br />
</ul>Also: <center><pre class="render:latex">Q_{update}=r_t+\gamma\max_{a}Q(s_{t+1}, a)</pre></center>Note carefully the effect and scope of the “max” <ul><li>This is the *local* best not the *global*<br />
<li>It is a heuristic know in computer science as Greedy Optimization." },<br />
</ul>These are the key flaws in this algorithm. So what really happens is: <div id="tree2"></div><script>
var tree2 = {
"value": "0 / 0", // "0 / 3",
//"mark": true,
"_children":
[
{
"value": "+1 / 1", // "+1 / 3",
//"mark": true,
"_children":
[
{
"value": "+1 / 1", // "+1 / 2",
//"mark": true,
"_children":
[
{
"value": "-1 / -1"
},
{
"value": "0 / 0" // "0 / 0"
},
{
//"mark": true,
"value": "+1 / 1" // "+1 / 1"
},
{
"value": "0 / 0" // "0 / 0"
}
]
},
{
"value": "0 / 0" // "0 / 0"
},
{
"value": "-1 / -1"
},
{
"value": "+1 / 1" // "+1 / 1"
}
]
},
{
"value": "0 / 0",
"_children":
[
{
"value": "-1 / -1", // "-1 / ?",
"children":
[
{
"value": "+5 / 5"
},
{
"value": "0 / 0"
},
]
},
{
"value": "+1 / +1" // "+1 / 1"
}
]
},
]
};
var width = 600;
var height = 700;
var svg2 = d3.select("#tree2").append("svg")
.attr("display", "block")
.attr("margin", "auto")
.attr("width", width)
.attr("height", height);
var treeHdl2 = make_tree(svg2, tree2);
var step_speed = 2000;
var animate_steps2 = [
{
action: function() {
treeHdl2.highlight(treeHdl2.root);
},
timeOut: step_speed
},
{
action: function() {
treeHdl2.show_hide(treeHdl2.root);
},
timeOut: step_speed
},
{
action: function() {
treeHdl2.highlight(treeHdl2.root.children[0]);
},
timeOut: step_speed
},
{
action: function() {
treeHdl2.change_text(treeHdl2.root, "0 / 1");
},
timeOut: step_speed
},
{
action: function() {
treeHdl2.show_hide(treeHdl2.root.children[0]);
},
timeOut: step_speed
},
{
action: function() {
treeHdl2.highlight(treeHdl2.root.children[0].children[0]);
treeHdl2.highlight(treeHdl2.root.children[0].children[3]);
},
timeOut: step_speed
},
{
action: function() {
treeHdl2.change_text(treeHdl2.root.children[0], "+1 / 2");
},
timeOut: step_speed
},
{
action: function() {
treeHdl2.change_text(treeHdl2.root, "0 / 2");
},
timeOut: step_speed
},
{
action: function() {
treeHdl2.highlight(treeHdl2.root.children[0].children[3]);
treeHdl2.show_hide(treeHdl2.root.children[0].children[0]);
},
timeOut: step_speed
},
{
action: function() {
treeHdl2.highlight(treeHdl2.root.children[0].children[0].children[2]);
},
timeOut: step_speed
},
{
action: function() {
treeHdl2.change_text(treeHdl2.root.children[0].children[0], "+1 / 2");
},
timeOut: step_speed
},
{
action: function() {
treeHdl2.change_text(treeHdl2.root.children[0], "+1 / 3");
},
timeOut: step_speed
},
{
action: function() {
treeHdl2.change_text(treeHdl2.root, "0 / 3");
},
timeOut: step_speed
},
{
action: function() {
treeHdl2.show_hide(treeHdl2.root.children[1]);
},
timeOut: step_speed
},
];
install_animate(svg2, animate_steps2);
</script>
<br />
Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-7146906684319503372017-03-18T14:19:00.001+09:002017-03-18T15:45:04.584+09:00Setting up basic Machine Learning rig - UbuntuFor ubuntu<br />
<a href="https://www.tensorflow.org/install/install_linux">https://www.tensorflow.org/install/install_linux</a><br />
<br />
First install python and pip <br />
<br />
<pre class="brush:shell">sudo apt-get install python python-pip python-dev
sudo pip install --upgrade pip virtualenv
</pre><br />
--- WITH A GPU ---<br />
<br />
Install the GPU if not already done as described here:<br />
<a href="https://www.pugetsystems.com/labs/hpc/NVIDIA-CUDA-with-Ubuntu-16-04-beta-on-a-laptop-if-you-just-cannot-wait-775/">https://www.pugetsystems.com/labs/hpc/NVIDIA-CUDA-with-Ubuntu-16-04-beta-on-a-laptop-if-you-just-cannot-wait-775/</a><br />
<br />
Verify that you even have a usable gpu with (it must be one compatiable with cuda <br />
<pre class="brush:shell">lspci | grep -i nvidia
</pre><br />
Install Cuda drivers<br />
<br />
Remove prior installs (if you have a problem with it)<br />
<pre class="brush:shell">sudo apt-get purge nvidia-cuda*
sudo apt-get install cuda
</pre><br />
download the recent cuda drivers from<br />
<a href="https://developer.nvidia.com/cuda-downloads">https://developer.nvidia.com/cuda-downloads</a><br />
<br />
install the drivers <br />
<pre class="brush:shell">chmod 755 cuda_7.5.18_linux.run
sudo ./cuda_7.5.18_linux.run --override
</pre><br />
Confirm setup<br />
<pre class="brush:shell">which nvcc
nvcc --version
nvidia-smi
</pre><br />
Output should be something like<br />
<pre>nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2015 NVIDIA Corporation
Built on Tue_Aug_11_14:27:32_CDT_2015
Cuda compilation tools, release 7.5, V7.5.17
Sat Mar 18 14:16:58 2017
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 367.57 Driver Version: 367.57 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 GeForce GTX 970M Off | 0000:01:00.0 Off | N/A |
| N/A 55C P0 22W / N/A | 586MiB / 3016MiB | 8% Default |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| 0 1144 G /usr/lib/xorg/Xorg 366MiB |
| 0 1922 G compiz 111MiB |
| 0 2302 G ...bled/ExtensionDeveloperModeWarning/Defaul 107MiB |
+-----------------------------------------------------------------------------+
</pre><br />
<br />
Install cudnn drivers<br />
<a href="http://askubuntu.com/questions/767269/how-can-i-install-cudnn-on-ubuntu-16-04">http://askubuntu.com/questions/767269/how-can-i-install-cudnn-on-ubuntu-16-04</a><br />
<br />
Download the drivers<br />
<a href="https://developer.nvidia.com/cudnn">https://developer.nvidia.com/cudnn<br />
</a><br />
<br />
Locate where your cuda installation is. it is /usr/lib/... and /usr/include or /urs/local/cuda/. <br />
<br />
<pre class="brush:shell">which nvcc
ldconfig -p | grep cuda
</pre><br />
Step 3: Copy the files:<br />
<pre class="brush:shell">cd extracted_driver/
sudo cp -P include/cudnn.h /usr/include
sudo cp -P lib64/libcudnn* /usr/lib/x86_64-linux-gnu/
sudo chmod a+r /usr/lib/x86_64-linux-gnu/libcudnn*
</pre><br />
Confirm setup<br />
<pre class="brush:shell">ldconfig -p | grep cudnn
</pre><br />
should be something like:<br />
<pre>libcudnn.so.5 (libc6,x86-64) => /usr/lib/x86_64-linux-gnu/libcudnn.so.5
libcudnn.so (libc6,x86-64) => /usr/lib/x86_64-linux-gnu/libcudnn.so
</pre><br />
--- INSTALL KEY TOOLS ---<br />
<br />
Install Machine learning essentials<br />
<br />
<pre class="brush:shell">sudo pip install numpy
sudo pip install pandas
sudo pip install scikit-learn
sudo pip install jupyter
sudo pip install xgboost
</pre><br />
<br />
Now you can install tensorflow with GPU as follows<br />
<pre class="brush:shell">sudo pip install tensorflow-gpu
sudo pip install keras
</pre><br />
Or without:<br />
<pre class="brush:shell">sudo pip install tensorflow
sudo pip install keras
</pre><br />
Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-83504249814416216942016-10-26T23:21:00.002+09:002016-10-27T21:08:00.299+09:00analytic regression in python using a parameterized family of functionsIm have been studying ML(machine learning). So here is the deal.. ML is just applied statics. You take a bunch of data and you fit a model to it. The data affords you a certain upper limit of accuracy compared to the real world. Your chosen model then subtracts a certain percentage off the top. The cost of abstraction is an imperfect fit. Simple.<br />
<br />
So one of the primary rules of thumb when working with this stuff is start simple. You need to digest the dataset and then make incremental steps forward... its easy however to be making steps backwards without knowing it.<br />
<br />
Now regression is just curve fitting. The simplest way to do this is just a direct analytic solution. no gradient descent, nothing more complex, but this approach has limits that quickly become clear when dealing with larger datasets.<br />
<br />
So lets solve analytic regression for a parameterized family of functions and then codify it... you'll note i have some quirks to the way i do things.. i try to avoid matrix maths since i think it hides too much and fails to readily when you get beyond 2d axis of data<br />
<br />
Define the model:<br />
<pre class="render:latex">\hat y_i := \sum_k w_k f(x_i;k)</pre><br />
Define the loss:<br />
<pre class="render:latex">MSE = \sum_i(\hat y_i - y_i)^2</pre><br />
Set target (when mse gradient is 0 the error is at its min for w of 0 to n):<br />
<pre class="render:latex">\frac{\partial MSE}{\partial w} = 0</pre><br />
Sub the model into the target equation for each "n" derviate term:<br />
<pre class="render:latex">= \frac{\partial}{\partial w_n} \Big( \sum_i( \sum_k w_k f(x_i;k) - y_i)^2 \Big)</pre><br />
Expand square (note not expanding the sum of k treat it as a single term):<br />
<pre class="render:latex">= \frac{\partial}{\partial w_n} \Big( \sum_i\big( (\sum_k w_k f(x_i;k))^2 -2 y_i \sum_k w_k f(x_i;k) + y_i^2 \big) \Big)</pre><br />
Transfer outer sum to terms: <br />
<pre class="render:latex">= \frac{\partial}{\partial w_n} \Big( \sum_i(\sum_k w_k f(x_i;k))^2 - \sum_i 2 y_i \sum_k w_k f(x_i;k) + \sum_iy_i^2 \Big)</pre><br />
Transfer derivative to terms and expand/rebase squares:<br />
<pre class="render:latex">= \frac{\partial}{\partial w_n} \sum_i(\sum_k w_k f(x_i;k))(\sum_j w_j f(x_i;j)) - \frac{\partial}{\partial w_n} \sum_i \sum_k 2 y_i w_k f(x_i;k) + \frac{\partial}{\partial w_n} \sum_iy_i^2</pre><br />
Do simple derivatives to the 2 right most terms: <br />
* Note that the partial derivative of a sum drops all terms unless n=k<br />
* Note that sum of y's looks like a constant and the derivate of a constant is 0 <br />
<pre class="render:latex">= \sum_i\frac{\partial}{\partial w_n} (\sum_k w_k f(x_i;k))(\sum_j w_j f(x_i;j)) - \sum_i 2 y_i f(x_i;n)</pre><br />
Start applying the derivative product rule:<br />
<pre class="render:latex">= \sum_i\sum_k w_k f(x_i;k) \frac{\partial}{\partial w_n}(\sum_j w_j f(x_j;j)) + \sum_i\sum_j w_j f(x_j;j) \frac{\partial}{\partial w_n}(\sum_k w_k f(x_i;k)) - \sum_i 2 y_i f(x_i;n)</pre><br />
Continue the derivative product rules:<br />
<pre class="render:latex">= \sum_i\sum_k w_k f(x_i;k) f(x_i;n) + \sum_i\sum_j w_j f(x_i;j) f(x_i;n)) - \sum_i 2 y_i f(x_i;n)</pre><br />
Rebase vars into common ones:<br />
<pre class="render:latex">= \sum_i\sum_j w_j f(x_i;j) f(x_i;n) + \sum_i\sum_j w_j f(x_i;j) f(x_i;n)) - \sum_i 2 y_i f(x_i;n)</pre><br />
Simply:<br />
<pre class="render:latex">= 2 \sum_i\sum_j w_j f(x_i;j) f(x_i;n) - 2 \sum_i y_i f(x_i;n)</pre><br />
Equate for a linear solver implementation (recall there are now "n" of these equations):<br />
<pre class="render:latex">\sum_i\sum_j w_j f(x_i;j) f(x_i;n) = \sum_i y_i f(x_i;n)</pre><br />
Now the above maths when codified produces the following code:<br />
<br />
<pre class="brush:python">#!/usr/bin/env python
# analytic regression for a general linear combination of functions
import random
import numpy as np
import matplotlib.pyplot as plt
# convert the maths into a function
# note this is likely weak in respect to:
# - overflow (because we are summing numbers)
# - large amounts of data (no batching etc)
# etc
def analytic_regress(x,y,params,func):
size = len(params)
left = np.zeros((size,size))
right = np.zeros(size)
for i in range(size):
right[i] = np.sum(y*func(x,params[i]))
for j in range(size):
left[i,j] = np.sum(func(x,params[i])*func(x,params[j]))
w = np.linalg.solve(left, right)
model = lambda x: (np.sum(w * np.array([func(x,i) for i in params])))
return w, model
### testing ###
def generate(func,
x_min, x_max,
yerr,
count):
x = np.random.uniform(x_min, x_max, count)
y = np.array([func(xi) for xi in x]) + np.random.uniform(0,yerr,count)
return x,y
# basis functions to trial
def ploy(x,n):
return x**n
def cossin(x,n):
if (n < 0):
return np.sin(-n * x)
return np.cos(n * x)
def sigmoid(x,n):
return 1.0/(1.0 + np.exp(n-x))
def guassian(x,n):
return np.exp(-0.5*(x-n)**2)
def square(x,n,w):
return (1.0*np.logical_and(x >= n, x < n+w))
# a general function to trial a few regressions
def test_set(func,
x_min, x_max, y_error, count,
basis, params,
x_test):
x,y = generate(func, x_min, x_max, y_error, count)
models = {}
for p in params:
w, models[p] = analytic_regress(x,y,range(p),basis)
print "model basis up to:", p, " has w:"
print w
y_test = {}
for p in params:
model = models[p]
y_test[p] = np.array([model(i) for i in x_test])
plt.plot(x , y , "o")
for p in params:
plt.plot(x_test, y_test[p])
plt.show()
# a general funtion to trial a single regression
def test_one(func,
x_min, x_max, y_error, count,
basis, params,
x_test):
x,y = generate(func, x_min, x_max, y_error, count)
w, model = analytic_regress(x,y, params, basis)
print w
y_test = np.array([model(i) for i in x_test])
plt.plot(x , y , "o")
plt.plot(x_test, y_test)
plt.show()
print " ploy regress for function y = 78 - 2x"
test_set(lambda x: (78.0 - 2.0*x),
0.0, 6.0, 6, 100,
ploy, [2,3,5,9,11],
np.array(range(-1, 70))/10.0)
print " ploy regress for function y = 78 + 60x - 13x^2"
test_set(lambda x: (78.0 + 60.0*x - 13.0*x**2),
0.0, 6.0, 6, 100,
ploy, [2,3,5,9,11],
np.array(range(-2, 72))/10.0)
print "square regress for function y = 1 iff 1 < x < 6 othewise 0"
test_one(lambda x: (0 if (x < 1 or x > 6) else 1),
0.0, 7.0, 0.4, 100,
lambda x,n: (square(x,n,0.5)), np.array(range(14))/2.0,
np.array(range(-50, 150))/10.0)
print "square regress for function y = 10*cos(2x)"
test_one(lambda x: 10*np.cos(2.0*x),
0.0, 7.0, 0.4, 100,
lambda x,n: (square(x,n,0.5)), np.array(range(14))/2.0,
np.array(range(-50, 150))/10.0)
print "sigmod regress for function y = 10*cos(2x)"
test_one(lambda x: 10*np.cos(2.0*x),
0.0, 7.0, 0.4, 100,
sigmoid, np.array(range(14))/2.0,
np.array(range(-50, 150))/10.0)
print "guassian regress for function y = 10*cos(2x)"
test_one(lambda x: 10*np.cos(2.0*x),
0.0, 7.0, 0.4, 100,
guassian, np.array(range(14))/2.0,
np.array(range(-50, 150))/10.0)
print "cos/sin regress for function y = 10*cos(2x)"
test_one(lambda x: 10*np.cos(2.0*x),
0.0, 7.0, 0.4, 100,
cossin, np.array(range(-6,8))/2.0,
np.array(range(-50, 150))/10.0)
</pre><br />
And then the various outputs...<br />
<br />
The regression using using a family of polynomials on a linear equation:<br />
<div class="separator" style="clear: both; text-align: center;"><a href="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEh72EjRCHIfsiZP-gldgfMiLRnj9Qcv4lDeIBzmy6e8NHlfvrFsxgyc0nIkzBsN-i38-17VFRRl2zXsKAK7-nM0aDVcEHLZpTjNnW65ohdMy2Z1e6tXUGRf_iXU11xF_w9SW7MvvOsxdvw/s1600/liner_regress.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" src="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEh72EjRCHIfsiZP-gldgfMiLRnj9Qcv4lDeIBzmy6e8NHlfvrFsxgyc0nIkzBsN-i38-17VFRRl2zXsKAK7-nM0aDVcEHLZpTjNnW65ohdMy2Z1e6tXUGRf_iXU11xF_w9SW7MvvOsxdvw/s320/liner_regress.png" width="320" height="240" /></a></div><br />
The regression using using a family of polynomials on a quadratic:<br />
<div class="separator" style="clear: both; text-align: center;"><a href="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiDKZ9y-vCB7WACNdpgHRSiaUY09euTHTtw5UtrAW0XoWyjvr25wHQXQEdVbDb-2Ntw5N8X1fAmgTX7W8nDsYR-XO_z7tnWNiK716rwR3riG-xOmtTIjX5n1zgHQqjp178eyI2LgVfwHJY/s1600/quat_with_regress.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" src="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiDKZ9y-vCB7WACNdpgHRSiaUY09euTHTtw5UtrAW0XoWyjvr25wHQXQEdVbDb-2Ntw5N8X1fAmgTX7W8nDsYR-XO_z7tnWNiK716rwR3riG-xOmtTIjX5n1zgHQqjp178eyI2LgVfwHJY/s320/quat_with_regress.png" width="320" height="240" /></a></div><br />
The regression using square bars on a square bar:<br />
<div class="separator" style="clear: both; text-align: center;"><a href="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgKyZaej8rd8fuqU97s4D6N2Ix5iadn45pWBW1H8AQt7KuGkz2W8MfU0ZXFhBP2wa44P77-wusswEGYOVJmu9mV5TX3GLB6Y4-lBANLOZ8zo1w3dvLTWZ0nufskHNNhrAnHn1MlGmVF6Ys/s1600/sqr_regress.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" src="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgKyZaej8rd8fuqU97s4D6N2Ix5iadn45pWBW1H8AQt7KuGkz2W8MfU0ZXFhBP2wa44P77-wusswEGYOVJmu9mV5TX3GLB6Y4-lBANLOZ8zo1w3dvLTWZ0nufskHNNhrAnHn1MlGmVF6Ys/s320/sqr_regress.png" width="320" height="240" /></a></div><br />
The regression using square bars on a cos:<br />
<div class="separator" style="clear: both; text-align: center;"><a href="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgkOrbBQuElVpl2JvqzRd6qviYAS4lDY9ipbUNDTl_IqiB9U4C4nUHdBKy5xRchf57wsR5jULJ94ldmhWxKCaBECknHeN91v98YxDDDB2z411af4wh7eKm52fS3XJagoQ6RUu-N9451Sgw/s1600/sine_sqr_regress.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" src="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgkOrbBQuElVpl2JvqzRd6qviYAS4lDY9ipbUNDTl_IqiB9U4C4nUHdBKy5xRchf57wsR5jULJ94ldmhWxKCaBECknHeN91v98YxDDDB2z411af4wh7eKm52fS3XJagoQ6RUu-N9451Sgw/s320/sine_sqr_regress.png" width="320" height="240" /></a></div><br />
<br />
The regression using guassians on a cos:<br />
<div class="separator" style="clear: both; text-align: center;"><a href="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgVvTlJQTZ8j-sGKbhJxPePvZHs4fQFgVlnNQojkaz3uxBjphBjtokBjcpdUnsLmQ9QUw8SnKHU94xI9ROQQzFMnyXqk9MUEzSMnxYqCfzgDAb0dL-ZEULf_tugUaKGGUBSJuMHTekfcMk/s1600/guassian_fit_sine.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" src="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgVvTlJQTZ8j-sGKbhJxPePvZHs4fQFgVlnNQojkaz3uxBjphBjtokBjcpdUnsLmQ9QUw8SnKHU94xI9ROQQzFMnyXqk9MUEzSMnxYqCfzgDAb0dL-ZEULf_tugUaKGGUBSJuMHTekfcMk/s320/guassian_fit_sine.png" width="320" height="240" /></a></div><br />
The regression using sigmoids on a cos:<br />
<div class="separator" style="clear: both; text-align: center;"><a href="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEi-b_EjtSD4T9eyVEzT51683M7hKEfW4oM9ybouX9r1K0ey5x53Q3lSMtaXQk2Mj73iHIZD-l0ECtH_2XQY5GKBuWc5kZq3smfOhHHRV72r3V5VskJqQnioxCCsPzHvmHq8SrRMwVqnU-E/s1600/sigmod_fiting_cos.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" src="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEi-b_EjtSD4T9eyVEzT51683M7hKEfW4oM9ybouX9r1K0ey5x53Q3lSMtaXQk2Mj73iHIZD-l0ECtH_2XQY5GKBuWc5kZq3smfOhHHRV72r3V5VskJqQnioxCCsPzHvmHq8SrRMwVqnU-E/s320/sigmod_fiting_cos.png" width="320" height="240" /></a></div><br />
The regression using sin and cos on a cos:<br />
<div class="separator" style="clear: both; text-align: center;"><a href="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjbI_0nAj2WPJGuC4Gul8aHNmC2HRr4iswJRqvaIXsK7bY90xsXec5tomMhgClcfWd-Sy4CLpgqX0poQaHZkcrwG9ONJ16y_jbbBw9Ry2x871WC5EdFAMbHINxVgN8nSXXuDvDeXUmtzkw/s1600/cossin_fit_cos.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" src="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjbI_0nAj2WPJGuC4Gul8aHNmC2HRr4iswJRqvaIXsK7bY90xsXec5tomMhgClcfWd-Sy4CLpgqX0poQaHZkcrwG9ONJ16y_jbbBw9Ry2x871WC5EdFAMbHINxVgN8nSXXuDvDeXUmtzkw/s320/cossin_fit_cos.png" width="320" height="240" /></a></div>Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-51739434672942579332016-10-26T21:28:00.004+09:002016-10-26T21:41:29.920+09:00Find the n shortest paths from a to b <pre class="brush:cpp">// compile with: g++ --std=c++11 multipath.cpp
//
// Problem Statement:
// find the n shortest paths from a to b
// Answer:
// The proposer of the question seemed to think this was hard
// and even stated many more limitations to the question(no cycles etc)..
// but i took it as dead easy. Since this inst a challenge of efficiency
// ill stop at the first pass naive recursive answer..
#include <utility>
#include <stdint.h>
#include <vector>
#include <map>
#include <memory>
#include <iostream>
struct Node
{
uint32_t id_;
std::vector<std::pair<Node*,uint32_t> > kids_;
Node(uint32_t id) :
id_(id)
{}
};
struct Graph
{
// meh should be better..
std::map<uint32_t, std::shared_ptr<Node> > nodes_;
Node* get(uint32_t id)
{
Node* n = nodes_[id].get();
if (n == NULL)
{
nodes_[id] = std::make_shared<Node>(id);
n = nodes_[id].get();
}
return n;
}
void link(uint32_t fromId,
uint32_t toId,
uint32_t weight)
{
Node* from = nodes_[fromId].get();
if (from == NULL)
{
nodes_[fromId] = std::make_shared<Node>(fromId);
from = nodes_[fromId].get();
}
Node* to = nodes_[toId].get();
if (to == NULL)
{
nodes_[toId] = std::make_shared<Node>(toId);
to = nodes_[toId].get();
}
from->kids_.emplace_back(std::make_pair(to,weight));
}
};
struct PathNode
{
std::shared_ptr<PathNode> parent_;
Node* node_;
uint32_t cost_;
PathNode(std::shared_ptr<PathNode> parent,
Node* node,
uint32_t cost) :
parent_(parent),
node_(node),
cost_(cost)
{}
};
std::ostream& operator<<(std::ostream& os,
const PathNode& path)
{
// inverted.. but it meets my needs
if (path.parent_.get() != NULL)
{
const PathNode& p = *(path.parent_);
os << p;
os << "-";
}
if (path.node_ != NULL)
os << path.node_->id_;
return os;
}
// ************ NAIVE APPROACH ***********
// WARNING do be careful here.. there are some large weaknesses
// in the Naive version:
// - negative weights will break the shortest first ordering
// - cycles that trap the search in disconnected areas will never terminate
//
// A stronger algo would fix this by checking reachability from the node and
// pruning the impossible branches from the tree (in fact that would be my next
// step in optimization as it also saves cycles.. but im here for a different
// target so not going there.. a more simple fix would be to add a cycle
// limit and terminate on that..)
std::vector<std::shared_ptr<PathNode> > findPaths(Node* from,
Node* to,
uint32_t limit)
{
std::vector<std::shared_ptr<PathNode> > results;
auto comp = [] (const std::shared_ptr<PathNode>& a,
const std::shared_ptr<PathNode>& b) -> bool
{ return a->cost_ > b->cost_; };
// I was using a std::priority_queue.. problem is i
// cant print it to demonstrate the stack action to
// the question proposer
std::vector<std::shared_ptr<PathNode> > stack;
// ok start the algo by adding the *from* node into a path.. and stacking it
stack.push_back(std::make_shared<PathNode>(nullptr,
from,
0));
while (results.size() < limit and not stack.empty())
{
// show the stack...
// std::cout << " stack: \n";
// for (const std::shared_ptr<PathNode>& path : stack)
// std::cout << " -- c:" << path->cost_ << " p:" << *path << "\n";
std::pop_heap(stack.begin(), stack.end(), comp);
std::shared_ptr<PathNode> current = stack.back();
stack.pop_back();
// check if node was at the terminal
if (current->node_ == to)
{
// register it as the next result
results.push_back(current);
// and end if we have our "limit" of solutions
if (results.size() > limit)
return results;
}
// and then expand search tree using the current node
for (const std::pair<Node*,uint32_t>& edge : current->node_->kids_)
{
std::shared_ptr<PathNode> step =
std::make_shared<PathNode>(current,
edge.first,
current->cost_ + edge.second);
// now here is a tricky part.. the next shortest route may
// include the current one even if it is a *terminal* (ie self loop)
// hence we *always* push even if its the *end*!
stack.push_back(step);
std::push_heap(stack.begin(), stack.end(), comp);
}
}
// ok we fell of the end of the stack that means there are not "limit" solutions
return results;
}
void printPaths(std::ostream& os,
const std::vector<std::shared_ptr<PathNode> >& paths)
{
os << "Results...\n";
for (const std::shared_ptr<PathNode>& path : paths)
{
os << "d:" << path->cost_ << " p:" << *path << "\n";
}
}
void test()
{
{
// warning naive algo death - dont do this with a cycle it will never terminate
Graph disconnected;
Node* to = disconnected.get(0);
Node* from = disconnected.get(1);
printPaths(std::cout, findPaths(from,to,3));
}
{
// self
Graph self;
self.link(0,0,1);
Node* zero = self.get(0);
printPaths(std::cout, findPaths(zero,zero,3));
}
{
// cycle
Graph cycle;
cycle.link(0,1,1);
cycle.link(1,0,1);
Node* from = cycle.get(0);
Node* to = cycle.get(1);
printPaths(std::cout, findPaths(from,to,3));
}
{
// a line of nodes ( asking for 3 anwers.. impossible)
Graph oneLine;
oneLine.link(0,1,1);
oneLine.link(1,2,1);
oneLine.link(2,3,1);
Node* from = oneLine.get(0);
Node* to = oneLine.get(3);
printPaths(std::cout, findPaths(from,to,3));
}
{
// 2 lines of nodes ( asking for 3 anwers.. impossible)
Graph twoLine;
twoLine.link(0,1,1);
twoLine.link(1,2,1);
twoLine.link(2,3,1);
twoLine.link(0,4,2);
twoLine.link(4,3,2);
Node* from = twoLine.get(0);
Node* to = twoLine.get(3);
printPaths(std::cout, findPaths(from,to,3));
}
{
// the questioners challenge graph
Graph triangle;
triangle.link(1,2,1);
triangle.link(1,3,2);
triangle.link(2,1,1);
triangle.link(2,3,3);
triangle.link(2,4,2);
triangle.link(2,5,1);
triangle.link(3,1,2);
triangle.link(3,2,3);
triangle.link(3,5,2);
triangle.link(3,6,1);
triangle.link(4,2,2);
triangle.link(4,5,3);
triangle.link(5,2,1);
triangle.link(5,3,2);
triangle.link(5,4,3);
triangle.link(5,6,3);
triangle.link(6,5,3);
triangle.link(6,3,1);
Node* from = triangle.get(1);
Node* to = triangle.get(6);
printPaths(std::cout, findPaths(from,to,10));
}
}
int main()
{
test();
}
</pre><br />
<br />
<br />
And output will look like this:<br />
<pre>Results...
Results...
d:0 p:0
d:1 p:0-0
d:2 p:0-0-0
Results...
d:1 p:0-1
d:3 p:0-1-0-1
d:5 p:0-1-0-1-0-1
Results...
d:3 p:0-1-2-3
Results...
d:3 p:0-1-2-3
d:4 p:0-4-3
Results...
d:3 p:1-3-6
d:5 p:1-2-3-6
d:5 p:1-2-1-3-6
d:5 p:1-2-5-6
d:5 p:1-3-6-3-6
d:5 p:1-2-5-3-6
d:7 p:1-2-1-2-5-6
d:7 p:1-2-1-2-1-3-6
d:7 p:1-2-3-6-3-6
d:7 p:1-2-5-2-1-3-6
</pre>Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-42145104747533489322016-10-05T22:18:00.001+09:002016-10-05T22:29:29.757+09:00Adding math rendering via katex to bloggerFirst edit the Html version of the blogs template and add the following code before the end of the body<br />
<br />
<pre class="brush:cpp"><!--KATEX RENDER BEGIN -->
<link href="https://khan.github.io/KaTeX/bower_components/katex/dist/katex.min.css" type="text/css" rel="stylesheet"/>
<script src="https://khan.github.io/KaTeX/bower_components/katex/dist/katex.min.js" type="text/javascript"></script>
<script language='javascript'>
//<![CDATA[
function renderLatex()
{
var locs = document.getElementsByTagName("pre");
for (var x = 0; x < locs.length; x++)
{
if (locs[x].className == "render:latex")
{
var eq = locs[x].innerHTML;
var div=document.createElement("div");
div.className = "latexRendered";
locs[x].parentNode.insertBefore(div,locs[x].nextSibling);
try
{
katex.render("\\displaystyle{" + eq + "}", div);
locs[x].className = "done:latex";
locs[x].style = "display:none";
}
catch (err)
{
div.innerHTML = err;
locs[x].className = "error:latex";
}
}
}
}
renderLatex();
//]]>
</script>
<!-- KATEX RENDER END -->
</pre><br />
You then blog using html tags like the following<br />
<br />
<pre class="brush:cpp"><pre class="render:latex">y_i = \sum_iw_ix_i</pre>
</pre><br />
And it should render like this<br />
<br />
<pre class="render:latex">y_i = \sum_iw_ix_i</pre><br />
<br />
Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-20544333415779975872016-09-22T21:23:00.001+09:002016-09-23T22:09:54.395+09:00intel drm driver issue and disk slowness fix in ubuntu / xbuntu 16If you are running xubuntu of ubuntu and have a intel graphics card then the drivers are a bit broken.. This causes general slowness and the apperance of constant disk accesses and it seems to show most often after suspension wakeup. <br />
<br />
You can confirm the issue by tailing dmesg:<br />
<pre class="brush:shell">dmesg -w
</pre><br />
Your watching for this kind of message:<br />
<pre class="brush:shell">[20474.916101] ------------[ cut here ]------------
[20474.916133] WARNING: CPU: 3 PID: 999 at /build/linux-a2WvEb/linux-4.4.0/drivers/gpu/drm/drm_irq.c:1326 drm_wait_one_vblank+0x1b5/0x1c0 [drm]()
[20474.916135] vblank wait timed out on crtc 0
[20474.916136] Modules linked in: uas usb_storage nls_iso8859_1 mmc_block udf crc_itu_t drbg ansi_cprng ctr ccm i2400m_usb i2400m wimax intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp arc4 iwldvm uvcvideo kvm_intel kvm videobuf2_vmalloc videobuf2_memops mac80211 videobuf2_v4l2 videobuf2_core irqbypass v4l2_common videodev crct10dif_pclmul crc32_pclmul iwlwifi aesni_intel media snd_hda_codec_hdmi aes_x86_64 snd_hda_codec_realtek lrw snd_hda_codec_generic gf128mul snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq joydev glue_helper ablk_helper cryptd input_leds serio_raw snd_seq_device toshiba_acpi snd_timer sparse_keymap cfg80211 snd soundcore toshiba_bluetooth wmi toshiba_haps mei_me mei lpc_ich shpchp mac_hid tpm_infineon ip6t_REJECT
[20474.916188] nf_reject_ipv6 nf_log_ipv6 xt_hl ip6t_rt nf_conntrack_ipv6 nf_defrag_ipv6 ipt_REJECT nf_reject_ipv4 nf_log_ipv4 nf_log_common xt_LOG xt_limit xt_tcpudp xt_addrtype nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack ip6table_filter ip6_tables nf_conntrack_netbios_ns nf_conntrack_broadcast nf_nat_ftp nf_nat nf_conntrack_ftp nf_conntrack iptable_filter ip_tables x_tables parport_pc ppdev lp parport autofs4 i915 psmouse i2c_algo_bit drm_kms_helper ahci syscopyarea sysfillrect libahci sysimgblt fb_sys_fops drm sdhci_pci e1000e sdhci ptp pps_core video fjes
[20474.916224] CPU: 3 PID: 999 Comm: Xorg Not tainted 4.4.0-36-generic #55-Ubuntu
[20474.916226] Hardware name: TOSHIBA dynabook R731/36EB/Portable PC, BIOS Version 3.60 01/24/2012
[20474.916228] 0000000000000286 00000000aa071208 ffff8800a46dbb08 ffffffff813f13b3
[20474.916231] ffff8800a46dbb50 ffffffffc00cfb38 ffff8800a46dbb40 ffffffff810810f2
[20474.916234] ffff88014a605000 0000000000000000 0000000000000000 00000000000e0368
[20474.916237] Call Trace:
[20474.916245] [<ffffffff813f13b3>] dump_stack+0x63/0x90
[20474.916251] [<ffffffff810810f2>] warn_slowpath_common+0x82/0xc0
[20474.916254] [<ffffffff8108118c>] warn_slowpath_fmt+0x5c/0x80
[20474.916259] [<ffffffff810c3815>] ? finish_wait+0x55/0x70
[20474.916272] [<ffffffffc009f2d5>] drm_wait_one_vblank+0x1b5/0x1c0 [drm]
[20474.916276] [<ffffffff810c3cb0>] ? wake_atomic_t_function+0x60/0x60
[20474.916311] [<ffffffffc0229d5a>] intel_atomic_commit+0x43a/0x6f0 [i915]
[20474.916329] [<ffffffffc00b7fbf>] ? drm_atomic_set_crtc_for_connector+0x6f/0xe0 [drm]
[20474.916346] [<ffffffffc00b8fc7>] drm_atomic_commit+0x37/0x60 [drm]
[20474.916359] [<ffffffffc0154d86>] drm_atomic_helper_set_config+0x76/0xb0 [drm_kms_helper]
[20474.916374] [<ffffffffc00a7e32>] drm_mode_set_config_internal+0x62/0x100 [drm]
[20474.916389] [<ffffffffc00ac48c>] drm_mode_setcrtc+0x3cc/0x4f0 [drm]
[20474.916402] [<ffffffffc009d742>] drm_ioctl+0x152/0x540 [drm]
[20474.916417] [<ffffffffc00ac0c0>] ? drm_mode_setplane+0x1b0/0x1b0 [drm]
[20474.916421] [<ffffffff81220c1f>] do_vfs_ioctl+0x29f/0x490
[20474.916424] [<ffffffff8120f7f1>] ? __sb_end_write+0x21/0x30
[20474.916428] [<ffffffff8120d3fd>] ? vfs_write+0x15d/0x1a0
[20474.916430] [<ffffffff81220e89>] SyS_ioctl+0x79/0x90
[20474.916434] [<ffffffff8182dfb2>] entry_SYSCALL_64_fastpath+0x16/0x71
[20474.916437] ---[ end trace a5e017619a02b625 ]---
</pre><br />
If your getting hung by it and dont what to shutdown you can get your system moving again by doing this:<br />
<pre class="brush:shell">sudo pm-suspend
</pre><br />
The full solution is the removal of the offending driver: <br />
<pre class="brush:shell">sudo apt-get remove xserver-xorg-video-intel
</pre><br />
Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-89826100656803303782016-09-22T18:07:00.001+09:002016-09-23T22:21:36.148+09:00Rasberry pi samba and git server setupPurchase and assumable physical items<br />
<br />
* model 3 pi + case<br />
* 32gb mirco sd card and adapter<br />
* usb portable disk<br />
* a usb hub (the pi lacks sufficient power output to drive a usb powered hdd) <br />
<br />
<table><tbody>
<tr> <td><iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="//ws-na.amazon-adsystem.com/widgets/q?ServiceVersion=20070822&OneJS=1&Operation=GetAdHtml&MarketPlace=US&source=ac&ref=tf_til&ad_type=product_link&tracking_id=codeslimjim-20&marketplace=amazon&region=US&placement=B0077VDNYU&asins=B0077VDNYU&linkId=d0d40aff0b7e166eadac366ec75fa577&show_border=true&link_opens_in_new_window=true&price_color=333333&title_color=0066c0&bg_color=ffffff" style="height: 240px; width: 120px;"><br />
</iframe></td> <td><br />
<iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="//ws-na.amazon-adsystem.com/widgets/q?ServiceVersion=20070822&OneJS=1&Operation=GetAdHtml&MarketPlace=US&source=ac&ref=tf_til&ad_type=product_link&tracking_id=codeslimjim-20&marketplace=amazon&region=US&placement=B01C6EQNNK&asins=B01C6EQNNK&linkId=00c69154973046f4551cefe06b18d5ff&show_border=true&link_opens_in_new_window=true&price_color=333333&title_color=0066c0&bg_color=ffffff" style="height: 240px; width: 120px;"><br />
</iframe></td> <td><br />
<iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="//ws-na.amazon-adsystem.com/widgets/q?ServiceVersion=20070822&OneJS=1&Operation=GetAdHtml&MarketPlace=US&source=ac&ref=tf_til&ad_type=product_link&tracking_id=codeslimjim-20&marketplace=amazon&region=US&placement=B00TKFEE5S&asins=B00TKFEE5S&linkId=8db98cb9eca0a57e6362e72b75d42272&show_border=true&link_opens_in_new_window=true&price_color=333333&title_color=0066c0&bg_color=ffffff" style="height: 240px; width: 120px;"><br />
</iframe></td> </tr>
</tbody></table><br />
<h3>Setup Jesse on the micro sd</h3>Note more info at: <a href="https://www.raspberrypi.org/documentation/installation/installing-images/">https://www.raspberrypi.org/documentation/installation/installing-images/</a><br />
<br />
1) Download the RASPBIAN JESSIE LITE .img<br />
<a href="https://www.raspberrypi.org/downloads/raspbian/">https://www.raspberrypi.org/downloads/raspbian/</a><br />
<br />
2) Get sd card mount point<br />
<br />
<pre class="brush:shell">df -h | egrep "mmc|sdd"
</pre><br />
<pre class="brush:shell">/dev/mmcblk0p1 30G 32K 30G 1% /media/kage/3535-3133
</pre><br />
3) umount so the card so it can be imaged<br />
<br />
<pre class="brush:shell">umount /dev/mmcblk0p1
</pre><br />
4) write img to sd card<br />
<br />
<pre class="brush:shell">sudo dd bs=4M if=~/Downloads/2016-05-27-raspbian-jessie-lite.img of=/dev/mmcblk0
sync
</pre><br />
5) correct sd card partition back to max size<br />
<br />
<pre class="brush:shell">sudo parted /dev/mmcblk0
</pre><br />
<pre class="brush:shell">(parted) p free
</pre><br />
<pre class="brush:shell">Model: SD SL32G (sd/mmc)
Disk /dev/mmcblk0: 31.9GB
Sector size (logical/physical): 512B/512B
Partition Table: msdos
Disk Flags:
Number Start End Size Type File system Flags
32.3kB 4194kB 4162kB Free Space
1 4194kB 70.3MB 66.1MB primary fat16 lba
2 70.3MB 31.9GB 31.8GB primary ext4
31.9GB 31.9GB 15.0MB Free Space
</pre><br />
<pre class="brush:shell">(parted) resizepart 2 32G
(parted) p free
</pre><br />
<pre class="brush:shell">Model: SD SL32G (sd/mmc)
Disk /dev/mmcblk0: 31.9GB
Sector size (logical/physical): 512B/512B
Partition Table: msdos
Disk Flags:
Number Start End Size Type File system Flags
32.3kB 4194kB 4162kB Free Space
1 4194kB 70.3MB 66.1MB primary fat16 lba
2 70.3MB 31.9GB 31.8GB primary ext4
</pre><br />
6) Eject card and reinsert. confirm mounting and sizes are ok<br />
<br />
<h3>setup passwordless ssh for pi user (assuming the machine your on is the accessor) </h3><br />
1) cd to the mounted sd card and into the pi home dir<br />
<br />
<pre class="brush:shell">cd /media/sd-card-uuid/
pushd .
cd home/pi
</pre><br />
2) setup .ssh with the working machines publish ssh info<br />
<pre class="brush:shell">mkdir .ssh
chmod 0700 .ssh
touch .ssh/authorized_keys
chmod 600 .ssh/authorized_keys
cat ~/.ssh/id_rsa.pub >> .ssh/authorized_keys
</pre><br />
<h3>setup static ip</h3><br />
1) cd to the mounted sd card and into the /etc/networks area<br />
<pre class="brush:shell">popd .
</pre><br />
2) modify the interface setup<br />
<pre class="brush:shell">sudo vi etc/network/interface
</pre><br />
3) setup a static hard line ip im using 192.168.1.175 feel free to adjust as needed<br />
<pre class="brush:shell">iface eth0 inet static
address 192.168.1.175
netmask 255.255.255.0
gateway 192.168.1.1
network 192.168.1.0
broadcast 192.168.1.255
</pre><br />
3b) OR setup the wifi (i would not advise this)<br />
<pre class="brush:shell">sudo vi etc/wpa_supplicant/wpa_supplicant.conf
</pre><br />
<pre class="brush:shell"># add this at the end
network={
ssid="network id"
psk="network password"
}
</pre><br />
<h3>finish up raw img adjustments</h3><br />
1) sync and umount card and then eject the card<br />
<br />
<pre class="brush:shell">sync
umount /dev/mmcblk0p1
</pre><br />
2) Install sd card in the pi3. You insert the micro card into the card reader on the underside of the pi3 board, face out. <br />
Power it up and check that it seems to booted <br />
<br />
3) Now in back your work machine find out where it went <br />
<br />
<pre class="brush:shell">nmap -sP 192.168.1.0/24
</pre><br />
4) If it booted at the correct address then great otherwise check your router setups <br />
<br />
<h3>check access and secure the default password</h3><br />
1) ssh into the pi. Note the default password is "raspberry" but you shouldnt need it your ssh key is the access method.<br />
<br />
<pre class="brush:shell">ssh pi@192.168.1.175
</pre><br />
2) Scramble the default password (I use a software vault, keepassx, to generate and store passwords) this way if someone gets in they cant just sudo with the default password and get root as well<br />
<br />
<pre class="brush:shell">passwd
</pre><br />
<h3>clean up and secure the ssh access</h3><br />
1) Its an image so /etc/ssh identity files are already setup.. thats not very healthy lets rebuild them<br />
<br />
<pre class="brush:shell">sudo rm -f /etc/ssh/*key*
sudo dpkg-reconfigure openssh-server
</pre><br />
2) Confirm the ssh is ok (for chmods etc)<br />
<br />
<pre class="brush:shell">ls -al ~/.ssh/authorized_keys
</pre><br />
3) Now secure the ssh access to keys only<br />
<br />
<pre class="brush:shell">sudo vi /etc/ssh/sshd_config
</pre><br />
4) Edit/add the following lines <br />
<br />
<pre class="brush:shell">PasswordAuthentication no
AllowTcpForwarding no
X11Forwarding no
</pre><br />
5) And confirm that ssh still works (with a second window).. Note if you make a mistake you can always turn off the pi, eject the sd card and edit it directly in your working PC.<br />
<br />
<h3>update machine and fix various other image guff</h3><br />
1) first update the images software.<br />
<br />
<pre class="brush:shell">sudo apt-get update
sudo apt-get upgrade
sudo apt-get dist-upgrade
</pre><br />
2) after that I seemed to be getting this mess with the locale being bad..<br />
<br />
<pre class="brush:shell">perl: warning: Please check that your locale settings:
LANGUAGE = (unset),
LC_ALL = (unset),
</pre><br />
3) so fix it with:<br />
<br />
<pre class="brush:shell">sudo dpkg-reconfigure locales
</pre><br />
<h3>Setup usb disk</h3>for more info refer to: <a href="http://www.howtogeek.com/139433/how-to-turn-a-raspberry-pi-into-a-low-power-network-storage-device/">http://www.howtogeek.com/139433/how-to-turn-a-raspberry-pi-into-a-low-power-network-storage-device/</a><br />
<br />
1) find out what usb disk we have attached<br />
<br />
<pre class="brush:shell">sudo blkid
</pre><br />
<pre class="brush:shell">/dev/sda1: LABEL="EC-PHU3" UUID="2E24054E24051B0B" TYPE="ntfs" PARTUUID="8418c874-01"
</pre><br />
2) Right looks like a ntfs (but it could have also been vfat etc). So setup ntfs drivers <br />
<br />
<pre class="brush:shell">sudo apt-get install ntfs-3g
</pre><br />
3) make the mount point<br />
<br />
<pre class="brush:shell">sudo mkdir /media/hdd
sudo chmod a+rwx /media/hdd
</pre><br />
4) And check that it can actually mount<br />
<br />
<pre class="brush:shell">sudo mount -t auto /dev/sda1 /media/hdd
</pre><br />
4a) ok.. stupid things are happening<br />
<br />
<pre class="brush:shell">modprobe: ERROR: ../libkmod/libkmod.c:557 kmod_search_moddep() could not open moddep file '/lib/modules/4.4.11-v7+/modules.dep.bin'
ntfs-3g-mount: fuse device is missing, try 'modprobe fuse' as root
</pre><br />
4b) well thats because.. it has the wrong /lib/modules version installed!!.. wtf!<br />
<br />
<pre class="brush:shell">ls /lib/modules/4.4.13-v7+/
</pre><br />
4c) So after googling for a while turns out there is a surprising fix... most likely it was something in that first apt-get update cycle caused all the modules to move forward!<br />
<br />
<pre class="brush:shell">sudo reboot
</pre><br />
<pre class="brush:shell">pi@raspberrypi:/lib/modules $ uname -a
Linux raspberrypi 4.4.13-v7+ #894 SMP Mon Jun 13 13:13:27 BST 2016 armv7l GNU/Linux
</pre><br />
4d) But something crazy happened to the ntfs-3g had to install it again??? no idea why..<br />
<br />
<pre class="brush:shell">sudo apt-get install ntfs-3g
</pre><br />
5) now mount and test access<br />
<br />
<pre class="brush:shell">sudo mount -t ntfs-3g -o uid=pi,gid=pi /dev/sda1 /media/hdd
cd /media/hdd
touch abc.txt
ls
rm /media/hdd/abc.txt
</pre><br />
6) if that worked and u can see the test file move on otherwise start googling.. <br />
<br />
<h3>Setup usb disk at permanent location </h3><br />
1) now we are going to make the usb disk a permanent fixture, umount it<br />
<br />
<pre class="brush:shell">sudo umount /media/hdd
</pre><br />
2) update /etc/fstab. We are going make all files owned by user pi and group users, read and writable <br />
extra info at <br />
* <a href="https://www.howtoforge.com/reducing-disk-io-by-mounting-partitions-with-noatime">https://www.howtoforge.com/reducing-disk-io-by-mounting-partitions-with-noatime</a><br />
* <a href="http://raspi.tv/2012/how-to-mount-and-use-a-usb-hard-disk-with-the-raspberry-pi">http://raspi.tv/2012/how-to-mount-and-use-a-usb-hard-disk-with-the-raspberry-pi</a><br />
* <a href="http://www.omaroid.com/fstab-permission-masks-explained/">http://www.omaroid.com/fstab-permission-masks-explained/</a><br />
* note nobootwait doesnt work???<br />
<br />
<pre class="brush:shell">sudo vi etc/fstab
</pre><br />
2a) and add the following line<br />
<pre class="brush:shell">/dev/sda1 /media/hdd ntfs-3g noatime,umask=0,uid=pi,gid=users,dmask=0007,fmask=0117 0 0
</pre><br />
2b) or you can use something like:<br />
<pre class="brush:shell">/dev/sda1 /media/hdd vfat uid=pi,gid=users,umask=0022,sync,auto,nosuid,rw,nouser 0 0
</pre><br />
3) and then mount it<br />
<pre class="brush:shell">sudo mount /dev/sda1
</pre><br />
4) test access <br />
<br />
<pre class="brush:shell">touch /media/hdd/abc.txt
rm /media/hdd/abc.txt
</pre><br />
<h3>setup samba for remote share</h3><br />
1) ok build samba share location...<br />
<br />
<pre class="brush:shell">mkdir /media/hdd/share
</pre><br />
2) install samba<br />
<br />
<pre class="brush:shell">sudo apt-get install samba samba-common-bin libpam-smbpass
</pre><br />
3) Configure samba<br />
<br />
<pre class="brush:shell">sudo cp /etc/samba/smb.conf /etc/samba/smb.conf.20160922
sudo vi /etc/samba/smb.conf
</pre><br />
4) insert or uncomment the following in the Authorization section<br />
<br />
<pre class="brush:shell">security = user
unix password sync = yes
</pre><br />
5) comment out homes,printers sections.. its junk we dont want or need<br />
<br />
6) insert "share" section<br />
<br />
<pre class="brush:shell">[Share]
comment = 1TB_samba
path = /media/hdd/share
valid users = remote
read only = No
</pre><br />
7) bounce the samba service<br />
<br />
<pre class="brush:shell">sudo /etc/init.d/samba restart
</pre><br />
8) check its setup <br />
<br />
<pre class="brush:shell">testparm -s
</pre><br />
9) now in the working pc with what ever file manager u use "browse" your network and locate the pi likely called "raspberrypi" open it and confirm there are no "printers" stuff and "share" is visible<br />
<br />
10) try to access the "share" using "guest" access, then as the user "pi" with the default password, your new password and confirm all of this has no access<br />
<br />
11) back in the pi. Make the "remote" user and passwd it. Again password vault scramble a password for it.. but make certain its type-able <br />
For more info refer to <a href="http://raspi.tv/2012/how-to-create-a-new-user-on-raspberry-pi">http://raspi.tv/2012/how-to-create-a-new-user-on-raspberry-pi</a><br />
<br />
<pre class="brush:shell">sudo adduser remote
sudo usermod -a -G users remote
groups remote
sudo smbpasswd -e remote
</pre><br />
12) then bounce samba<br />
<br />
<pre class="brush:shell">sudo /etc/init.d/samba restart
</pre><br />
13) then back in the working PC retest the samba share using the user "remote" and there unix password and confirm file/dir create/delete etc. Also check the guest and "pi" users again to be certain..<br />
<br />
14) then reboot and check mount stays put and samba works over the reboot<br />
<br />
<pre class="brush:shell">sudo reboot
</pre><br />
<h3>Setup the git ssh server</h3><br />
1) make the repos storage location <br />
<br />
<pre class="brush:shell">mkdir /media/hdd/repos
</pre><br />
2) install git<br />
<br />
<pre class="brush:shell">sudo apt-get install git-core
</pre><br />
3) add a git user<br />
<br />
<pre class="brush:shell">sudo adduser git
sudo usermod -a -G users git
groups git
</pre><br />
4) switch over to the new "git" user<br />
<br />
<pre class="brush:shell">#cat *pi* users access keys first so u can copy it later for step 6
cat .ssh/authorized_keys
</pre><br />
<pre class="brush:shell">su git
cd ~
</pre><br />
5) confirm sudo limits.. (should fail)<br />
<br />
<pre class="brush:shell">sudo ls
</pre><br />
6) setup the "git" users ssh access <br />
<br />
<pre class="brush:shell">mkdir .ssh
chmod 0700 .ssh
touch .ssh/authorized_keys
chmod 600 .ssh/authorized_keys
vi ~/.ssh/authorized_keys
</pre><br />
7) from your working pc confirm ssh access<br />
<br />
<pre class="brush:shell">ssh git@192.168.1.175
</pre><br />
8) link the repos storage to the git users home<br />
<br />
<pre class="brush:shell">ln -s /media/hdd/repos repos
</pre><br />
9) create a test repo<br />
<br />
<pre class="brush:shell">cd ~/repos
git init --bare test.git
</pre><br />
9a) Note ignore chmod errors (we have it forced to a certian way in fstab)<br />
<pre class="brush:shell">error: chmod on /media/hdd/repos/test.git/config.lock failed: Operation not permitted
error: chmod on /media/hdd/repos/test.git/config.lock failed: Operation not permitted
</pre><br />
Note to self. There might be an issue with executable scripts loosing x permission due to this...may have to rethink this.<br />
<br />
10) Then on your working machine clone and check that the repo worked<br />
<br />
<pre class="brush:shell">git clone git@<hostname ip="" or="">:~/repos/test.git
cd test/
ls
touch test.txt
git add test.txt
git commit -a -m "Initial"
git push
</hostname></pre><br />
11) then reclone and confirm test.txt is there in the new clone<br />
<br />
<pre class="brush:shell">cd ..
git clone git@192.168.1.4:~/repos/test.git test2
cd test2
ls
</pre><br />
<h3>setup automatic updating </h3><br />
now we know all the basics are working... lets harden it a bit<br />
<br />
1) setup auto updates<br />
for more info see <a href="https://unixsysdoc.wordpress.com/2015/05/18/xubuntu-security-and-hardening-part-i-basics/">https://unixsysdoc.wordpress.com/2015/05/18/xubuntu-security-and-hardening-part-i-basics/</a><br />
<br />
<pre class="brush:shell">sudo apt-get install unattended-upgrades
sudo dpkg-reconfigure -plow unattended-upgrades
</pre><br />
<h3>disable needless hardware</h3><br />
1) disable wifi and bluetooth (as im using the hard line)<br />
<br />
<pre class="brush:shell">sudo touch /etc/modprobe.d/disable_rpi3_wifi_bt.conf
sudo vi /etc/modprobe.d/disable_rpi3_wifi_bt.conf
</pre><br />
2) add the lines<br />
<br />
<pre class="brush:shell">##wifi
blacklist brcmfmac
blacklist brcmutil
##blue tooth
blacklist btbcm
blacklist hci_uart
</pre><br />
3) and reboot <br />
<br />
<pre class="brush:shell">sudo reboot
</pre><br />
4) log back in to the pi and confirm wifi is off<br />
<br />
<pre class="brush:shell">ifconfig
</pre><br />
<h3>setup the firewall</h3>for more info check; <a href="https://www.digitalocean.com/community/tutorials/how-to-setup-a-firewall-with-ufw-on-an-ubuntu-and-debian-cloud-server">https://www.digitalocean.com/community/tutorials/how-to-setup-a-firewall-with-ufw-on-an-ubuntu-and-debian-cloud-server</a><br />
<br />
1) install ufw.. its an easy to use firewall<br />
<br />
<pre class="brush:shell">sudo apt-get install ufw
</pre><br />
2) let in local ssh<br />
<br />
<pre class="brush:shell">sudo ufw allow ssh
</pre><br />
3) let in local samba (and bonjour)<br />
for more info check: <a href="https://ubuntuforums.org/showthread.php?t=806000">https://ubuntuforums.org/showthread.php?t=806000</a><br />
<br />
<pre class="brush:shell">sudo ufw allow proto tcp to any port 135 from 192.168.0.0/16
sudo ufw allow proto udp to any port 137 from 192.168.0.0/16
sudo ufw allow proto udp to any port 138 from 192.168.0.0/16
sudo ufw allow proto tcp to any port 139 from 192.168.0.0/16
sudo ufw allow proto tcp to any port 445 from 192.168.0.0/16
sudo ufw allow proto udp to any port 5353 from 192.168.0.0/16
</pre><br />
4) and turn it on<br />
<br />
<pre class="brush:shell">sudo ufw enable
</pre><br />
5) then retest all the samba acces and git cloning<br />
<br />
<h3>Setup the hostname to something better</h3><br />
1) adjust the hostname so you know which machine your in replace "raspberrypi" with the name you want<br />
<br />
<pre class="brush:shell">sudo vi /etc/hostname
sudo vi /etc/hosts
</pre><br />
2) and reboot<br />
<br />
<pre class="brush:shell">sudo reboot
</pre><br />
<h3>Clean up </h3><br />
1) clean up the working pc git clones<br />
<br />
<pre class="brush:shell">rm -rf test
rm -rf test2
</pre><br />
2) go back to the pi clean out the test and import/setup your real ones<br />
<br />
<pre class="brush:shell">rm test.git
</pre><br />
<h3>bonus notes.. </h3><br />
1) i trialed wake on lan<br />
<br />
<pre class="brush:shell">sudo apt-get install ethtool
ethtool eth0 | grep wake
ethtool -s eth0 wol g
</pre><br />
# it responds with bad news <br />
<pre class="brush:shell">Cannot get wake-on-lan settings: Operation not permitted
</pre><br />
Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-6616810737876763742016-08-13T14:53:00.000+09:002016-09-23T22:29:23.471+09:00google interview: efficiently find a number in a partial sorted list <pre class="brush:cpp">// http://www.impactinterview.com/2009/10/140-google-interview-questions/#software_engineer
// You are given a list of numbers. When you reach the end of the list you
// will come back to the beginning of the list (a circular list). Write the
// most efficient algorithm to find the minimum # in this list. Find any given
// # in the list. The numbers in the list are always increasing but you
// don’t know where the circular list begins, ie: 38, 40, 55, 89, 6, 13, 20,
// 23, 36.
// First of all note very carefully this is for a *unidirectional circular linked
// list* if we alter the design of the data structure there are much better ways
// to do this.. but i believe that is out of bounds as a potential answer.
// hmm.. essentially the find min number algorithm is a component of the find
// any number algorithm.. Since the question isn't written as find the min,
// then fix the list start and then find any # ill assume that it was meant as
// a helper step to get to the find algorithm.. so in the interest of getting
// it done lets just cut to the find any # algorithm and code a few evolutions
// showing efficiency improvements..
#include <iostream>
#include <vector>
#include <chrono>
#include <functional>
// ****************************************
// *************** CYCLIC LIST ************
// ****************************************
class List
{
std::vector<int> data_;
int offset_;
public:
class Iterator
{
List* list_;
int index_;
public:
Iterator() :
list_(NULL),
index_(-1)
{}
Iterator(List* list) :
list_(list),
index_(list->offset_)
{}
// WARNING DO NOT USE in find.. that was not spec'ed
// this is for testing only
Iterator(List* list, int offset) :
list_(list),
index_(offset)
{}
Iterator& operator++()
{
index_ = (index_ + 1) % list_->data_.size();
return *this;
}
Iterator operator+(int delta)
{
int offset = (index_ + delta) % list_->data_.size();
while (offset < 0) offset += list_->data_.size();
return Iterator(list_, offset);
}
// WARNING DO NOT UE in find.. that was not spec'ed
// this is for testing only
Iterator& operator--()
{
index_ = (list_->data_.size() + index_ - 1) % list_->data_.size();
return *this;
}
int operator*()
{
return list_->data_[index_];
}
bool operator==(const Iterator& other) const
{
return other.list_ == list_ and
other.index_ == index_;
}
bool operator!=(const Iterator& other) const
{
return not (other == *this);
}
bool valid() const
{
return list_ != NULL;
}
};
List(std::initializer_list<int> data,
int offset = 0) :
data_(data),
offset_(offset)
{}
List(int size) :
data_(size),
offset_(rand() % size)
{
// random fill
int value = 0;
for (int i = 0; i < size; ++i)
{
value += 2 + rand() % 5;
data_[i] = value;
}
}
bool empty() const
{
return data_.empty();
}
Iterator begin()
{
return Iterator(this);
}
Iterator random()
{
return Iterator(this, rand() % data_.size());
}
};
// ****************************************
// *************** FIND NAIVE *************
// ****************************************
List::Iterator find_naive(List& data, int target)
{
if (data.empty()) return List::Iterator();
List::Iterator start = data.begin();
List::Iterator i = start;
do
{
if (*i == target) return i;
++i;
}
while (i != start);
return List::Iterator();
}
// ****************************************
// ************* FIND NAIVE V2 ************
// ****************************************
// Attempt to simply the code a bit so that we handle the cyclic iteration and
// partial sorted issue cleaner for latter scaling
List::Iterator find_naive2(List& data, int target)
{
if (data.empty()) return List::Iterator();
//termination is an issue because start *is* the end ..
// simplify this by checking the start..
List::Iterator end = data.begin();
if (target == *end) return end;
// then start at the first item after and now "start" is really the end
List::Iterator i = end + 1;
while (i != end and *i != target) ++i;
if (*i == target) return i;
return List::Iterator();
}
// Of course you could try a divide and conquer idea.. but your dealing with a
// list that you want to divide in half. Dividing it it half means you need to
// step the iterator the mid point.. So assuming you know the size of the list
// the total search on a fully sorted list will *always* cost u n/2 + n/4 + n/8
// ... moves. Which totals to n-1 (ie if n=8 is 4+2+1=7).. So the performance
// is actually worst on average for the naive version ... of course this might
// not be true because the cost of iterators might be very low compared to the
// compare.. but i dont think it is... so im skipping it..
// ****************************************
// ******** SKIP TO SORTED SECTION ********
// ****************************************
// What we can really do is try to take advantage of the sorted order of data
// and terminate early.. In order to do that what you have to do is skip to
// the correct sorted part for your target and then search... (hence the find
// min number stuff before)
// Keep in mind this gets much less robust to problems with the data.. ie
// if the list isnt sort (and parted) before hand..
List::Iterator find_skip_start(List& data, int target)
{
// handle the empty dataset case..
if (data.empty()) return List::Iterator();
//termination is an issue because start *is* the end ..
// simplify this by checking the start..
List::Iterator end = data.begin();
if (target == *end) return end;
// then start at the first item after and now "start" is really the end
List::Iterator i = end + 1;
// if the target is in the second part
if (target < *end)
{
// skip to the second half
int split_edge = *end;
while (*i >= split_edge and i != end) ++i;
}
// then search for the target
while (*i < target and i != end) ++i;
if (*i == target) return i;
return List::Iterator();
}
// ****************************************
// ******* DOUBLE SKIP IN SECTION *********
// ****************************************
// Now because its sorted you dont have to check items every time you move
// forward. You can step and then roll back if you went to far.. this makes
// some big assumptions about the cost of iterator movement vs integer
// compares.. my "List" class isnt great but it works out.. and in
// theory should cut out every second compare..
List::Iterator find_double_skip(List& data, int target)
{
// handle the empty dataset case..
if (data.empty()) return List::Iterator();
//termination is an issue because start *is* the end ..
// simplify this by checking the start..
List::Iterator end = data.begin();
if (target == *end) return end;
// then start at the first item after and now "start" is really the end
List::Iterator i = end + 1;
if (target < *end)
{
int split_edge = *end;
// double skip to second half
List::Iterator iprev;
while (*i >= split_edge and i != end)
{
++i;
iprev = i;
if (i != end) ++i;
}
// check if we need to back up the double step.
if (iprev.valid() and *iprev < split_edge) i = iprev;
}
{
// double skip till target found or exceeded
List::Iterator iprev;
while (*i < target and i != end)
{
++i;
iprev = i;
if (i != end) ++i;
}
// check if found the target at either of the pointers..
if (iprev.valid() and *iprev == target) return iprev;
if (*i == target) return i;
}
return List::Iterator();
}
// ****************************************
// ********** N SKIP IN SECTION ***********
// ****************************************
// Optimization.. of course if u can do better with steps of 2 then u can
// generalize it to any number of steps.. for example using steps of 20.. now
// we are really playing balance games.. trading the speeds of various
// comparisons, assignments etc.. Then u have to tune the size of skip for
// your machine(s)
List::Iterator find_n_skip(List& data, int target)
{
const int skip = 20;
// handle the empty dataset case..
if (data.empty()) return List::Iterator();
//termination is an issue because start *is* the end ..
// simplify this by checking the start..
List::Iterator end = data.begin();
if (target == *end) return end;
// then start at the first item after and now "start" is really the end
List::Iterator i = end + 1;
if (target < *end)
{
List::Iterator iprev = i;
int split_edge = *end;
// skip to second half
while (*i >= split_edge and i != end)
{
++i;
iprev = i;
int steps = skip;
while (--steps > 0 and i != end) ++i;
}
// when found return to the last known ok point and search forward 1 by 1
i = iprev;
while (*i >= split_edge and i != end) ++i;
}
{
List::Iterator iprev = i;
while (*i < target and i != end)
{
++i;
iprev = i;
int steps = skip;
while (--steps > 0 and i != end) ++i;
}
// when found return to the last known ok point and search forward 1 by 1
i = iprev;
while (*i < target and i != end) ++i;
if (*i == target) return i;
}
return List::Iterator();
}
void test(std::string dataTag,
std::string algoTag,
std::function<List::Iterator (List& , int)> func,
List& data,
int target,
bool hasTarget)
{
auto start = std::chrono::system_clock::now();
List::Iterator result = func(data, target);
auto end = std::chrono::system_clock::now();
std::cout << algoTag << " for " << dataTag << ": ";
if (not result.valid())
std::cout << (hasTarget ? "FAIL" : "pass");
else
std::cout << ((hasTarget and (target == *result)) ? "pass" : "FAIL");
std::cout << " -- time:"
<< std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()
<< " nSec" << std::endl;
}
void testSet(std::string tag,
List& data,
int target,
bool hasTarget)
{
test(tag, "naive ", find_naive, data, target, hasTarget);
test(tag, "naive2", find_naive2, data, target, hasTarget);
test(tag, "skip ", find_skip_start, data, target, hasTarget);
test(tag, "2xskip", find_double_skip, data, target, hasTarget);
test(tag, "nskip ", find_n_skip, data, target, hasTarget);
std::cout << "\n";
}
int main()
{
srand(std::chrono::system_clock::now().time_since_epoch().count());
{
List list({10,11,12,1,2,3,4,5,6,7,8});
testSet("small ", list, 4, true);
}
{
List list(1000000);
testSet("big rand ", list, *list.random(), true);
testSet("big nrand ", list, *list.random()-1, false); // note that random always is +2 ~ +7 so -1 means i will not be in the data..
testSet("big early ", list, *(list.begin() + 10), true); // i hope.. its random so it might not be..
testSet("big nearly", list, *(list.begin() + 10)-1, false); // check early exit in first half on fail
testSet("big last ", list, *(--list.begin()), true);
testSet("big nlast ", list, -1, false);
}
}
</pre><br />
<br />
<pre>naive for small : pass -- time:3207 nSec
naive2 for small : pass -- time:2175 nSec
skip for small : pass -- time:1996 nSec
2xskip for small : pass -- time:2151 nSec
nskip for small : pass -- time:4232 nSec
naive for big rand : pass -- time:14229998 nSec
naive2 for big rand : pass -- time:13228995 nSec
skip for big rand : pass -- time:12786059 nSec
2xskip for big rand : pass -- time:10874261 nSec
nskip for big rand : pass -- time:9004453 nSec
naive for big nrand : pass -- time:43750054 nSec
naive2 for big nrand : pass -- time:37382249 nSec
skip for big nrand : pass -- time:17939681 nSec
2xskip for big nrand : pass -- time:15509062 nSec
nskip for big nrand : pass -- time:12927864 nSec
naive for big early : pass -- time:738 nSec
naive2 for big early : pass -- time:611 nSec
skip for big early : pass -- time:632 nSec
2xskip for big early : pass -- time:708 nSec
nskip for big early : pass -- time:1064 nSec
naive for big nearly: pass -- time:28980196 nSec
naive2 for big nearly: pass -- time:27551947 nSec
skip for big nearly: pass -- time:1034 nSec
2xskip for big nearly: pass -- time:719 nSec
nskip for big nearly: pass -- time:1217 nSec
naive for big last : pass -- time:27341796 nSec
naive2 for big last : pass -- time:27780499 nSec
skip for big last : pass -- time:28165415 nSec
2xskip for big last : pass -- time:24593599 nSec
nskip for big last : pass -- time:22455148 nSec
naive for big nlast : pass -- time:27195820 nSec
naive2 for big nlast : pass -- time:26584976 nSec
skip for big nlast : pass -- time:8640245 nSec
2xskip for big nlast : pass -- time:7304119 nSec
nskip for big nlast : pass -- time:6601818 nSec
</pre>Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-89936308120012802502016-08-09T00:25:00.001+09:002016-08-09T00:51:06.816+09:00google interview: measure the speed of a context switch<pre class="brush:cpp">// http://www.impactinterview.com/2009/10/140-google-interview-questions/#software_engineer
// Write a C program which measures the the speed of a context switch on a
// UNIX/Linux system.
// wow.. just wow... the questioner hates you.. so moving on...
// A context switch occurs when the kernel or hardware thinks its time to do
// something else... you have no control of it.. *period*... the only true
// way to measure it is to get into the kernel remove *everything* and check it
// directly.. without running any other programs and device drivers etc, i
// mean hell painting the screen, that blinking cursor your looking at while
// typing thats *all* a context switch.
// so write code to measure this.. well.. i dont know the kernel so thats out..
// so i dont have the skills to do the *real* job.. the best i can do is guess..
// basically i can measure the whole process: program executing, ctx switch, do
// something else, ctx switch, complete execution
// so we are going to try to measure something that should have very little
// variation and should scale well.. ie multiply and accumulate a register.
// we will do the ops few times and profile them. Then change the total number
// of times we do it and reprofile.. of course smaller context switches may
// be getting buried in the testing method etc.. not to mention the timers
// effects etc etc
// what we are looking for is a statistical blip... the mult accum should just
// shift right when we scale it.. anything that stays still isnt part of our
// test.. its likely to be some part of the kernels context switching or device
// drivers
// Anyway results looked as expected.. the test execution time moved to the
// right when the number of cycles was increased.. there is a set of a large
// reoccurring spikes in the execution times.. but i doubt these are the cost
// of a context switch, more likely some device driver, service, interrupt
// handlers bumping into my execution etc
#include <iostream>
#include <chrono>
#include <map>
int main()
{
std::map<int,int> counts;
for (int k = 0; k < 100; ++k)
{
std::cout << ".";
for (int j = 0; j < 1000000; ++j)
{
auto start = std::chrono::system_clock::now();
for (int i = 0; i < 100000; ++i)
{
i += i*i;
}
auto end = std::chrono::system_clock::now();
int delta = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
// sigh.. crud but will work for now
if (counts.find(delta) != counts.end())
++(counts[delta]);
else
counts[delta] = 1;
}
}
std::cout << "\n";
for (std::map<int,int>::iterator cit = counts.begin();
cit != counts.end();
++cit)
{
std::cout << " " << cit->first << ":" << cit->second << "\n";
}
}
</pre><br />
The results copied out into open office and graphed etc. Note that the "fuzz" in the red section maybe significant also.. but im not really a maths guy so I cant be certain, and my use of stats here is really flimsy so grain of salt people... Update: i just noticed that i didnt align the graphs very well(this wasn't intentional) anyway the mistake is a bit disadvantageous to the results so ill leave it be.<br />
<br />
<div class="separator" style="clear: both; text-align: center;"><a href="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhMm5o8ZxAN0Sa5t-CLkI5aIhcb4XA9TNi7CMjw2tcMKOps4Ny6VIhmqyL8cA4guL39KbNJfbRCJDEjDzO4SOaKLru-N3eUXTZJT7nrGFfIOWshJwo5ejqhQxt3qFZavkjipyiOuHp20BQ/s1600/kernal_profile.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" src="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhMm5o8ZxAN0Sa5t-CLkI5aIhcb4XA9TNi7CMjw2tcMKOps4Ny6VIhmqyL8cA4guL39KbNJfbRCJDEjDzO4SOaKLru-N3eUXTZJT7nrGFfIOWshJwo5ejqhQxt3qFZavkjipyiOuHp20BQ/s400/kernal_profile.png" width="400" height="278" /></a></div>Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-17961643137742529122016-08-06T14:31:00.001+09:002016-08-06T14:50:00.854+09:00google interview: detect a cycle in a graph<pre class="brush:cpp">// detect a cycle in a directed graph
#include <iostream>
#include <vector>
#include <unordered_set>
#include <unordered_map>
#include <memory>
#include <algorithm>
#include <functional>
#include <chrono>
struct Node
{
static int gID;
int id_;
std::vector<Node*> kids_;
Node() : id_(++gID) {}
void link(Node& kid) { kids_.push_back(&kid); }
};
int Node::gID = 0;
// *************************************************
// *************** PRE BOARD WORK *****************
// *************************************************
//
// Basic loop with Node "R" going into unknown "Graph"
// +---+ << check for self link node in path
// ~~~~~~~~~ | |
// ( ) <--R<--+
// ) Graph ( ^
// ( ) -?-+
// ~~~~~~~~~
//
// Recursive case for "A" a child Node of "R"
// +---+---+ << check for prior node in path
// ~~~~~~~~~ | v |
// ( ) <--A<--R<--+
// ) Graph ( ^ ^
// ( ) -?-+-?-+
// ~~~~~~~~~
//
// Recursive case for "B" with child Node "A"
// +---+---+--+ << check for *ANY* prior node in path
// ~~~~~~~~~ | v v |
// ( ) <--B<--A<--R<-+
// ) Graph ( ^ ^ ^
// ( ) -?-+-?-+-?-+
// ~~~~~~~~~
//
// etc
//
// Therefore first algo is to DFS visit all nodes and check if current nodes
// kids loop back into the path of nodes before the current one.
// *************************************************
// ***************** BASIC RECURSE *****************
// *************************************************
bool hasCycle_recurseImpl(Node* v,
std::vector<Node*>& path)
{
// Mark the current node as part of pathway
path.push_back(v);
// then for all kids
for(Node* i : v->kids_)
{
// if the kid is path of the path you have a loop done
std::vector<Node*>::iterator pit =
std::find(path.begin(), path.end(), i);
if (pit != path.end())
return true;
// recurse into kid
if (hasCycle_recurseImpl(i, path) )
return true;
}
// remove the node from path
path.pop_back();
return false;
}
bool hasCycle_recurse(Node* root)
{
std::vector<Node*> path;
if (root == NULL) return false;
return hasCycle_recurseImpl(root, path);
}
// *************************************************
// ***************** UNORDERED PATH ****************
// *************************************************
// optimization of path - ordering is of the path is *not* important. As a
// result we can choose a data structure with O(1) search, insert and delete ie
// a hash
bool hasCycle_unordPathImpl(Node* v,
std::unordered_set<Node*>& path)
{
// Mark the current node as part of pathway
path.insert(v);
// then for all kids
for(Node* i : v->kids_)
{
// if the kid is path of the path you have a loop done
if (path.find(i) != path.end())
return true;
// recurse into kid
if (hasCycle_unordPathImpl(i, path) )
return true;
}
// remove the node from path
path.erase(v);
return false;
}
bool hasCycle_unordPath(Node* root)
{
std::unordered_set<Node*> path;
if (root == NULL) return false;
return hasCycle_unordPathImpl(root, path);
}
// *************************************************
// ****************** NO REVISIT ******************
// *************************************************
// optimization of visiting - never revisit a node. The logic behind this is
// easy but it took me a bit to actually see it.
//
// If you visit a node then the DFS will terminate on 2 possible conditions.
// 1) you found a cycle,
// 2) you exhausted all possible nodes below this one and didn't end up in a
// cycle.
//
// Now here is what took me a bit to get.. If you ever enter a path that forms
// a cycle you have no possible way to exit unless you found end of the cycle
// that means that you can never see a visited node unless it is about to start
// a cycle or has been exhausted and will *never* form a cycle even if we add to
// the current path. IE if it could have formed a cycle with the current node
// then it should have traversed into the current node already before it was
// exhausted.
bool hasCycle_1visitImpl(Node* v,
std::unordered_set<Node*>& visited,
std::unordered_set<Node*>& path)
{
if(visited.find(v) == visited.end())
{
// Mark the current node as visited and part of pathway
visited.insert(v);
path.insert(v);
// then for all kids
for(Node* i : v->kids_)
{
// if the kid is path of the path you have a loop done
if (path.find(i) != path.end())
return true;
// recurse into kid
if (hasCycle_1visitImpl(i, visited, path) )
return true;
}
// remove the node from path
path.erase(v);
}
return false;
}
bool hasCycle_1visit(Node* root)
{
std::unordered_set<Node*> visited;
std::unordered_set<Node*> path;
if (root == NULL) return false;
return hasCycle_1visitImpl(root, visited, path);
}
// *************************************************
// *** CALL STACK, PATH and VISIT DUPLICATION *****
// *************************************************
// call stack, path hash and visit hash optimization -
// the recursive call stack is basically the same as the path stack and the path
// stack is the same as the visit stack all 3 data structures can be made into
// one.. using an iterative approach that stores the parent node to remove call
// stack, and a stateful hash entry to double as visit and child idx marking which
// kid is currently in use when in the path
bool hasCycle_iterative(Node* root)
{
typedef std::pair<Node*,int> ParentKidIdx;
std::unordered_map<Node*, ParentKidIdx> path;
if (root == NULL) return false;
// note the oddity with the new nodes current kids we start with the end kid
// this way it naturally ends in a negative idx when kids are exhusted
path[root] = ParentKidIdx(NULL, root->kids_.size());
Node* current = root;
while (current != NULL)
{
std::unordered_map<Node*, ParentKidIdx>::iterator pit
= path.find(current);
if (pit == path.end())
std::cout << "IMPOSSIBLE! we have a node without a state\n";
// just for easy of undertanding std::pair's member names suck..
// and as u can see this is a bit confusing
Node* parent = pit->second.first;
int& kidIdx = pit->second.second; // note its *writable*
// find the next kid that that current node should work on ..
Node* newKid = NULL;
while (kidIdx >= 0 and newKid == NULL)
{
// try to get the next kid
--kidIdx;
if (kidIdx >= 0)
{
// extract the kid
newKid = current->kids_[kidIdx];
// check if the kid has a state already
std::unordered_map<Node*, ParentKidIdx>::iterator kit
= path.find(newKid);
if (kit != path.end())
{
// child is has aleady been visited but whats its current
// state at??
if (kit->second.second >= 0)
// it is activite and in the current path..so this is a
// cycle
return true;
// it was visited and exhusted.. waste of time move to
// next one
newKid = NULL;
}
else
{
// kid has never been visied before make its init state
// entry and then visited it
path[newKid] = ParentKidIdx(current,
newKid->kids_.size());
}
}
}
if (newKid != NULL)
{
// we found a new kid to explore.. jump in to it
current = newKid;
}
else
{
// we have exhusted all the current ones kids.. move up to the
// parent
current = parent;
}
}
return false;
}
// *************************************************
// ******************* TEST ************************
// *************************************************
void testCase(std::string tag,
std::function<bool(Node*)> hasCycle,
Node* root,
bool expected)
{
auto start = std::chrono::system_clock::now();
bool result = hasCycle(root);
auto end = std::chrono::system_clock::now();
std::cout << tag << ": " << (expected == result ? "pass" : "FAIL")
<< " -- time:"
<< std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()
<< " nSec" << std::endl;
}
void testCaseSet(std::string set,
Node* root,
bool expected)
{
std::cout << "\n *** " << set << " ***\n";
testCase("hasCycle_recurse", hasCycle_recurse, root, expected);
testCase("hasCycle_unord ", hasCycle_unordPath, root, expected);
testCase("hasCycle_1visit ", hasCycle_1visit, root, expected);
testCase("hasCycle_iter ", hasCycle_iterative, root, expected);
}
void test()
{
{
testCaseSet("null",
NULL, false);
}
{
Node a0;
testCaseSet("single node",
&a0, false);
}
{
Node a0;
a0.link(a0); // cycle
testCaseSet("single node cycle",
&a0, true);
}
{
Node a0;
Node a1;
a0.link(a1);
testCaseSet("two node",
&a0, false);
}
{
Node a0;
Node a1;
a0.link(a1);
a0.link(a1);
testCaseSet("two node - double link",
&a0, false);
}
{
Node a0;
Node a1;
a0.link(a1);
a1.link(a0); // cycle
testCaseSet("two node cycle",
&a0, true);
}
{
// visit breaker
Node a0;
Node a1;
Node a2;
a0.link(a2);
a0.link(a1);
a1.link(a2);
testCaseSet("triple node double end link - reversed",
&a0, false);
}
{
// visit breaker
Node a0;
Node a1;
Node a2;
a0.link(a1);
a0.link(a2);
a1.link(a2);
testCaseSet("triple node double end link",
&a0, false);
}
{
Node a0;
Node a1;
Node a2;
Node a3;
a0.link(a2);
a0.link(a1);
a1.link(a2);
a2.link(a0); // cycle..
a2.link(a3);
a3.link(a3); // cycle
testCaseSet("some random tree",
&a0, true);
}
{
// graph speed breaker wide first node with repeated long tail..
Node head;
Node chest;
std::vector<Node> shoulders(100);
for (int i = 0; i < shoulders.size(); ++i)
{
head.link(shoulders[i]);
shoulders[i].link(chest);
}
std::vector<Node> tail(100);
chest.link(tail[0]);
for (int i = 1; i < tail.size(); ++i)
{
tail[i-1].link(tail[i]);
}
testCaseSet("head shoulder tail",
&head, false);
Node looper;
tail[tail.size()-1].link(looper);
looper.link(head);
testCaseSet("head shoulder tail - looped",
&head, true);
}
std::cout << std::endl;
}
int main()
{
test();
}
</pre><br />
<pre>*** null ***
hasCycle_recurse: pass -- time:2574 nSec
hasCycle_unord : pass -- time:13314 nSec
hasCycle_1visit : pass -- time:4531 nSec
hasCycle_iter : pass -- time:4097 nSec
*** single node ***
hasCycle_recurse: pass -- time:4975 nSec
hasCycle_unord : pass -- time:12330 nSec
hasCycle_1visit : pass -- time:9873 nSec
hasCycle_iter : pass -- time:9279 nSec
*** single node cycle ***
hasCycle_recurse: pass -- time:4614 nSec
hasCycle_unord : pass -- time:8321 nSec
hasCycle_1visit : pass -- time:9863 nSec
hasCycle_iter : pass -- time:8335 nSec
*** two node ***
hasCycle_recurse: pass -- time:19714 nSec
hasCycle_unord : pass -- time:9582 nSec
hasCycle_1visit : pass -- time:13318 nSec
hasCycle_iter : pass -- time:11426 nSec
*** two node - double link ***
hasCycle_recurse: pass -- time:5507 nSec
hasCycle_unord : pass -- time:11714 nSec
hasCycle_1visit : pass -- time:13890 nSec
hasCycle_iter : pass -- time:11678 nSec
*** two node cycle ***
hasCycle_recurse: pass -- time:5299 nSec
hasCycle_unord : pass -- time:7683 nSec
hasCycle_1visit : pass -- time:12348 nSec
hasCycle_iter : pass -- time:9779 nSec
*** triple node double end link - reversed ***
hasCycle_recurse: pass -- time:7694 nSec
hasCycle_unord : pass -- time:13821 nSec
hasCycle_1visit : pass -- time:18194 nSec
hasCycle_iter : pass -- time:12800 nSec
*** triple node double end link ***
hasCycle_recurse: pass -- time:7322 nSec
hasCycle_unord : pass -- time:13999 nSec
hasCycle_1visit : pass -- time:16998 nSec
hasCycle_iter : pass -- time:12793 nSec
*** some random tree ***
hasCycle_recurse: pass -- time:4840 nSec
hasCycle_unord : pass -- time:7792 nSec
hasCycle_1visit : pass -- time:11853 nSec
hasCycle_iter : pass -- time:15752 nSec
*** head shoulder tail ***
hasCycle_recurse: pass -- time:18295450 nSec
hasCycle_unord : pass -- time:14848133 nSec
hasCycle_1visit : pass -- time:615861 nSec
hasCycle_iter : pass -- time:512595 nSec
*** head shoulder tail - looped ***
hasCycle_recurse: pass -- time:131233 nSec
hasCycle_unord : pass -- time:136192 nSec
hasCycle_1visit : pass -- time:242850 nSec
hasCycle_iter : pass -- time:176559 nSec
</pre>Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-59577100740093972622016-08-03T22:37:00.000+09:002016-08-03T22:48:47.560+09:00Google interview: Implement an AVL tree ...Seems to me that google is most likely to ask about Tree inserts over erase. Also they might ask for traversal, infix printing etc etc... basically the cheapest way to do this is just code the whole AVL tree eco system period...<br />
<br />
AVL trees look complex but they basically are a BST tree with an additional pair of rebalancing rotations that you apply to them after an insert or erase.. this makes them somewhat close to the theoretical log(n) depth of a well balanced BST tree... <br />
<br />
There are 2 rotate operations that are depicted as follows. they are reversed depending of the side that are viewing them from (some people call them 4 with i think just over complicates them), personally i know them as inside and outside rotates, you can tell by looking at the position of the nodes grand child.. if its under the grandparent its inside, otherwise its outside. <br />
<br />
<pre>a) grandchild-outside-child (left side)
T1, T2, T3 and T4 are subtrees.
z y
/ \ / \
y T4 Right Rotate (z) x z
/ \ ---------------> / \ / \
x T3 T1 T2 T3 T4
/ \
T1 T2
b) grandchild-inside-child (left-side)
z z x
/ \ / \ / \
y T4 Left Rotate (y) x T4 Right Rotate(z) y z
/ \ --------------> / \ --------------> / \ / \
T1 x y T3 T1 T2 T3 T4
/ \ / \
T2 T3 T1 T2
</pre><br />
The trick to knowing when to perform the rebalance ops is to check the height balance between the nodes left and right side of the current one. If the height balance is off by more than +/- 1 you need to rotate, the sign tells you the direction. Then to determine if its the inner or outer case you check the balance of the child node.. as you can see from the diagram above the outer case should already have an inbalance in the *same* direction(sign) as the parents inbalance if it does not then you have the inner case and need to adjust it to make the other case before doing the rotate to correct..<br />
<br />
<pre class="brush:cpp">#include <stdint.h>
#include <vector>
#include <algorithm>
#include <iostream>
#include <functional>
template <typename T>
class AvlTree
{
public:
// late binding via constructor - dont pollute type
typedef std::function<bool(const T&, const T&)> Comparer;
private:
struct Node
{
uint32_t height_;
Node* left_;
Node* right_;
T item_;
Node(const T& item) :
height_(1),
left_(NULL),
right_(NULL),
item_(item)
{}
~Node()
{
if (left_ != NULL) delete left_;
if (right_ != NULL) delete right_;
}
int height() const
{
int leftHeight = (left_ != NULL) ? left_->height_ : 0;
int rightHeight = (right_ != NULL) ? right_->height_ : 0;
return std::max(leftHeight, rightHeight) + 1;
}
int balance() const
{
int leftHeight = (left_ != NULL) ? left_->height_ : 0;
int rightHeight = (right_ != NULL) ? right_->height_ : 0;
return leftHeight - rightHeight;
}
};
public:
class Iterator
{
// infix DFS reshaped into stack for an iterator.. fun but crazy...
//
// Ok.. So i got creative here.. rather than use parent pointers in the
// tree I implemented the iterator as a DFS parent stack.. now you do
// *not* want to do this in production because its actually a lot more
// fragile than the parent pointer solution..
//
// consider the following trade offs
// - the find operation needs to create the stack which is costly and
// iterator returned from the find will often *not* move making it
// needless
// - the iterator is less robust to tree updates. with a parent
// pointer the iterator just points at one node so changing anything that
// is not the current node is generally ok.. However with the stack
// implementation any update that effects the parent chain breaks the
// iterators stack and makes a large issue
// TODO can a bi-directional version be made??...
typedef std::vector<Node*> Stack;
Stack stack_;
void enstackLeft(Node* node);
void next();
public:
Iterator(Node* root); // not publicily usable because Node is not public!
Iterator();
Iterator& operator++();
void operator++(int);
const T& operator*() const;
const T* operator->() const;
T& operator*() ;
T* operator->();
int height() const; // yep mostly for testing
bool operator==(const Iterator& other) const;
bool operator!=(const Iterator& other) const;
};
class Handle
{
// a light weight *immovable* iterator .. so *not* an iterator
// TODO it *is* possible to lazy covert to a real iterator if ++'ed
// but whatever the stacked based iterator is crazy
Node* node_;
public:
Handle() :
node_(NULL)
{}
Handle(Node* node) :
node_(node)
{}
const T& operator*() const { return node_->item_; }
T& operator*() { return node_->item_; }
const T* operator->() const { return &(node_->item_); }
T* operator->() { return &(node_->item_); }
operator bool () const { return node_ != NULL; }
};
private:
Node* root_;
Comparer cmp_;
// TODO compilation firewalling issues...
// ie can this be converted to a base Node without T and a factory to
// create and handle Nodes with T.
void rotateLeft (Node*& node);
void rotateRight(Node*& node);
void rebalance (Node*& node);
void insertImpl (const T& item,
Node*& node);
void replacementEraseRight(Node*& node,
Node*& other);
void eraseImpl(const T& item,
Node*& node);
public:
AvlTree() :
root_(NULL),
cmp_(std::less<T>())
{}
AvlTree(Comparer cmp) :
root_(NULL),
cmp_(cmp)
{}
~AvlTree()
{
if (root_ != NULL) delete root_;
}
void insert(const T& item);
void erase(const T& item);
Handle find(const T& item);
void print(std::ostream& os);
Iterator begin();
Iterator end();
};
// *************************************************
// ******************** ITERATOR *******************
// *************************************************
template <typename T>
AvlTree<T>::Iterator::Iterator() :
stack_()
{}
template <typename T>
AvlTree<T>::Iterator::Iterator(AvlTree<T>::Node* root) :
stack_()
{
//when we add new nodes we go down the left side and enstack then
enstackLeft(root);
}
template <typename T>
void AvlTree<T>::Iterator::enstackLeft(typename AvlTree<T>::Node* node)
{
while (node != NULL)
{
stack_.push_back(node);
node = node->left_;
}
}
template <typename T>
void AvlTree<T>::Iterator::next()
{
// node has just completed left side and self visit.. remove me from the stack
Node* current = stack_.back();
stack_.pop_back();
// and enstack add my right branch
enstackLeft(current->right_);
}
template <typename T>
const T& AvlTree<T>::Iterator::operator*() const
{
return stack_.back()->item_;
}
template <typename T>
T& AvlTree<T>::Iterator::operator*()
{
return stack_.back()->item_;
}
template <typename T>
const T* AvlTree<T>::Iterator::operator->() const
{
return &(stack_.back()->item_);
}
template <typename T>
T* AvlTree<T>::Iterator::operator->()
{
return &(stack_.back()->item_);
}
template <typename T>
int AvlTree<T>::Iterator::height() const
{
return stack_.back()->height_;
}
template <typename T>
typename AvlTree<T>::Iterator& AvlTree<T>::Iterator::operator++()
{
next();
return *this;
}
// NOTE void on x++ to termiate the post iterator junk
template <typename T>
void AvlTree<T>::Iterator::operator++(int)
{
// Node* prev = stack_.back();
next();
// return ...;
}
template <typename T>
bool AvlTree<T>::Iterator::operator==(const AvlTree<T>::Iterator& other) const
{
// TODO short cut the last item?? would be quicker
return other.stack_ == stack_;
}
template <typename T>
bool AvlTree<T>::Iterator::operator!=(const AvlTree<T>::Iterator& other) const
{
// TODO short cut the last item?? would be quicker
return not (other == *this);
}
template <typename T>
typename AvlTree<T>::Iterator AvlTree<T>::begin()
{
return Iterator(root_);
}
template <typename T>
typename AvlTree<T>::Iterator AvlTree<T>::end()
{
return Iterator();
}
// *************************************************
// ********************** FIND *********************
// *************************************************
// this is a bit of a problem using stack based iterators makes find
// more weighty using nodes exposes the internals
// lightwieght_iterator?? one that cant move but gives you access.. a "handle"
template <typename T>
typename AvlTree<T>::Handle AvlTree<T>::find(const T& item)
{
Node* node = root_;
while (node != NULL)
{
if (cmp_(item, node->item_)) // less *than*
{
node = node->left_;
}
else if (cmp_(node->item_, item)) // greater *than*
{
node = node->right_;
}
else
{
return typename AvlTree<T>::Handle(node);
}
}
return typename AvlTree<T>::Handle();
}
// *************************************************
// ********************** PRINT ********************
// *************************************************
template <typename T>
void AvlTree<T>::print(std::ostream& os)
{
for (AvlTree<uint32_t>::Iterator it = begin();
it != end();
++it)
{
os << *it << "(" << it.height() << ")" << " ";
}
}
template <typename T>
std::ostream& operator<<(std::ostream& os, AvlTree<T>& tree)
{
tree.print(os);
return os;
}
// *************************************************
// ******************** REBALANCE ******************
// *************************************************
template <typename T>
void AvlTree<T>::rotateLeft(typename AvlTree<T>::Node*& node)
{
// given the current node make its left the top
Node* right = node->right_;
if (right == NULL) return; // this will explode.. stop!
node->right_ = right->left_;
right->left_ = node;
// correct height.. *before* adjusting parents link (node)
node->height_ = node->height();
right->height_ = right->height();
// keep in mind that this is using Node*& so the following updates the
// nodes parent left/right or the root!
node = right;
}
template <typename T>
void AvlTree<T>::rotateRight(typename AvlTree<T>::Node*& node)
{
// given the current node make its left the top
Node* left = node->left_;
if (left == NULL) return; // this will explode.. stop!
node->left_ = left->right_;
left->right_ = node;
// correct height,,, *before* adjusting parents link (node)
node->height_ = node->height();
left->height_ = left->height();
// keep in mind that this is using Node*& so the following updates the
// nodes parent left/right or the root!
node = left;
}
// attempt to rebalance in a general way for both insert delete..
template <typename T>
void AvlTree<T>::rebalance(typename AvlTree<T>::Node*& node)
{
if (node == NULL) return;
// correct node height..
node->height_ = node->height();
// and compute AVL rotations
int balance = node->balance();
// done if balance is good...
if (balance > -2 and balance < 2) return;
Node* child = NULL;
if (balance < -1)
{
// right side is heavy
child = node->right_;
}
else if (balance > 1)
{
// left side is heavy
child = node->left_;
}
int childBalance = child->balance();
// std::cout << " ++ childBalance: " << childBalance << "\n";
// now rotate to fix inbalance
if (balance > 0)
{
// planning to rotate right..
if (childBalance <= 0)
//but if the inner child left heavy or balanced correct it
rotateLeft(node->left_);
// left side so rotate right
rotateRight(node);
}
else
{
// planning to rotate left..
if (childBalance >= 0)
//but if the inner child right heavy or balanced correct it
rotateRight(node->right_);
rotateLeft(node);
}
}
// *************************************************
// ********************* INSERT ********************
// *************************************************
template <typename T>
void AvlTree<T>::insertImpl(const T& item,
typename AvlTree<T>::Node*& node)
{
// BST tree insert
if (node == NULL)
{
node = new Node(item);
return;
}
if (cmp_(item, node->item_))
{
insertImpl(item, node->left_);
}
else
{
insertImpl(item, node->right_);
}
// now AVL tree rebalance..
rebalance(node);
}
template <typename T>
void AvlTree<T>::insert(const T& item)
{
insertImpl(item, root_);
}
// *************************************************
// ********************* ERASE *********************
// *************************************************
template <typename T>
void AvlTree<T>::replacementEraseRight(typename AvlTree<T>::Node*& node,
typename AvlTree<T>::Node*& other)
{
// we are no longer searching for the erase target..
// we are now searching for the next object after it
// the right side min
if (other->left_ != NULL)
{
// go right to the end of the left branching and do replacement erase
replacementEraseRight(node,other->left_);
// then rebalance the tree from here up
rebalance(other);
}
else
{
// swap replacement with target node so nothing odd happens
std::swap(node->item_, other->item_);
// now unlink and erase other from tree
Node* right = other->right_;
other->right_ = NULL;
delete other;
// relink others child in its place
other = right;
}
}
template <typename T>
void AvlTree<T>::eraseImpl(const T& item,
typename AvlTree<T>::Node*& node)
{
// BST tree erase
// didnt exist in the tree??
if (node == NULL) return;
//
if (cmp_(item, node->item_)) // a < b
{
eraseImpl(item, node->left_);
}
else if (cmp_(node->item_, item)) // b < a
{
eraseImpl(item, node->right_);
}
else // b == a
{
// match!
if ((node->right_ == NULL) or (node->right_ == NULL))
{
// non special case.. zero/single link can link with parent
Node* child = (node->right_ == NULL) ? node->left_ : node->right_;
// unlink node.. and kill it
node->right_ = NULL;
node->left_ = NULL;
delete node;
// replace with child
node = child;
}
else
{
// special case both right and left have stuff.. but we have 1 parent
// means we have to take a one of the closet (in value) nodes and replace this one
// we have 2 choices for the replacement the max of left or min of right...
replacementEraseRight(node, node->right_);
// TODO.. shouldn't this do a balance check and then go left or right based on the heavy branch??
}
}
// and then AVL tree rebalance..
rebalance(node);
}
template <typename T>
void AvlTree<T>::erase(const T& item)
{
eraseImpl(item, root_);
}
// *************************************************
// ********************* AVL MAP *******************
// *************************************************
// TODO cleanup AvlTree to AvlMap coverison class
template <typename Key,
typename Value>
class AvlMap
{
typedef std::pair<Key,Value> Pair;
typedef std::function<bool(const Pair&, const Pair&)> Comparer;
typedef AvlTree<Pair> Tree;
AvlTree<Pair> tree_;
public:
typedef typename Tree::Iterator Iterator;
AvlMap() :
tree_([] (const Pair& v1, const Pair& v2) { return v1.first < v2.first; })
{}
Value& operator[](const Key& key)
{
// hmm not such a great implementation...
Pair target(key,Value());
typename AvlTree<Pair>::Handle it = tree_.find(target);
if (it) return it->second;
tree_.insert(target);
return tree_.find(target)->second;
}
Iterator begin() { return tree_.begin(); }
Iterator end() { return tree_.end(); }
};
// *************************************************
// *********************** TEST ********************
// *************************************************
void test()
{
// TODO honestly this inst real testing.. need to make it check the results..
{
AvlTree<uint32_t> tree;
tree.begin();
tree.end();
tree.find(1);
}
{
AvlTree<uint32_t> tree;
tree.insert(50); std::cout << tree << "\n";
tree.insert(25); std::cout << tree << "\n";
tree.insert(10); std::cout << tree << "\n";
tree.insert(40); std::cout << tree << "\n";
tree.insert(30); std::cout << tree << "\n";
tree.insert(45); std::cout << tree << "\n";
tree.insert(75); std::cout << tree << "\n";
AvlTree<uint32_t>::Handle handle = tree.find(75);
std::cout << "find:" << *handle << "\n";
tree.erase(50); std::cout << tree << "\n";
tree.erase(25); std::cout << tree << "\n";
tree.erase(10); std::cout << tree << "\n";
tree.erase(40); std::cout << tree << "\n";
tree.erase(30); std::cout << tree << "\n";
tree.erase(45); std::cout << tree << "\n";
tree.erase(75); std::cout << tree << "\n";
}
// outer rotate check
{
AvlTree<uint32_t> tree;
tree.insert(15); std::cout << tree << "\n";
tree.insert(77); std::cout << tree << "\n";
tree.insert(35); std::cout << tree << "\n";
// pair erase check
tree.erase( 1); std::cout << tree << "\n";
tree.erase(35); std::cout << tree << "\n";
tree.erase(15); std::cout << tree << "\n";
tree.erase(77); std::cout << tree << "\n";
tree.erase( 1); std::cout << tree << "\n";
}
{
AvlTree<uint32_t> tree;
tree.insert(93); std::cout << tree << "\n";
tree.insert(86); std::cout << tree << "\n";
tree.insert(87); std::cout << tree << "\n";
// pair erase check
tree.erase(99); std::cout << tree << "\n";
tree.erase(86); std::cout << tree << "\n";
tree.erase(87); std::cout << tree << "\n";
tree.erase(93); std::cout << tree << "\n";
tree.erase(99); std::cout << tree << "\n";
}
}
void randomTreeTest()
{
AvlTree<uint32_t> tree;
for (int i = 0; i < (4096-1); ++i)
{
uint32_t value = std::rand() % 100;
// std::cout << value << " ";
tree.insert(value);
}
std::cout << "\n";
// frequency count the nodes at each layer..
// thought it was ironic to use AvlTree as the map also.. just to prove that its flexible
AvlMap<int,int> freq;
for (AvlTree<uint32_t>::Iterator it = tree.begin();
it != tree.end();
++it)
{
++(freq[it.height()]);
}
std::cout << "\n";
// dump it
for (AvlMap<int,int>::Iterator it = freq.begin();
it != freq.end();
++it)
{
std::cout << it->first << ":" << it->second << " ";
}
std::cout << "\n";
}
int main()
{
test();
randomTreeTest();
}
</pre><br />
<br />
And the output looks like:<br />
<pre>50(1)
25(1) 50(2)
10(1) 25(2) 50(1)
10(1) 25(3) 40(1) 50(2)
10(1) 25(3) 30(1) 40(2) 50(1)
10(1) 25(2) 30(1) 40(3) 45(1) 50(2)
10(1) 25(2) 30(1) 40(3) 45(1) 50(2) 75(1)
find:75
10(1) 25(2) 30(1) 40(3) 45(1) 75(2)
10(1) 30(2) 40(3) 45(1) 75(2)
30(1) 40(3) 45(1) 75(2)
30(1) 45(2) 75(1)
45(2) 75(1)
75(1)
15(1)
15(2) 77(1)
15(1) 35(2) 77(1)
15(1) 35(2) 77(1)
15(1) 77(2)
77(1)
93(1)
86(1) 93(2)
86(1) 87(2) 93(1)
86(1) 87(2) 93(1)
87(2) 93(1)
93(1)
1:1992 2:995 3:518 4:266 5:129 6:90 7:45 8:26 9:16 10:8 11:5 12:2 13:2 14:1
</pre>Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-56543849434439476072016-07-25T23:31:00.002+09:002016-07-26T21:57:02.281+09:00google interview: compute the mininual coins - asymptotic constant versionI have cracked the asymptotically constant version of the min coin counts algorithm .. <br />
<br />
<pre class="brush:cpp">// http://www.impactinterview.com/2009/10/140-google-interview-questions/#software_engineer
//
// Given a set of coin denominators, find the minimum number of coins to give a
// certain amount of change.
#include <cstdint>
#include <climits>
#include <vector>
#include <iostream>
#include <functional>
#include <chrono>
#include <memory>
#include <list>
#include <queue>
// I knew there was an asymptotically constant version of algorithm i just cracked
// it using top down dynamic programming combined squeeze theorem (the same way
// that Archimedes found pi)
//
// https://en.wikipedia.org/wiki/Squeeze_theorem
// https://betterexplained.com/articles/prehistoric-calculus-discovering-pi/
// how it works:
// First off we are going to search with a dynamic programming version of the
// depth first search. We are searching by choosing a count for a coin
// denomination and then moving to the next denomination searching all the way
// down this branch until we exhaust it. Since the algorithm is a dynamic
// version of DFS it means we are using a stack, not recursion.
// Also note that we are using squeeze theorem that means we need to track an
// upperBoound(the know best) and a lowerBound the theoretical floor that an
// unknown branch might achieve
// So by using DFS we can get down to the last coin denomination and get a
// solid result quickly and that potentially tightens the upperBound more. As
// the upperbound tightens we are able to trim the search tree more, and as a
// result speed up the overall search more.
// 1) first we set the best and the initial upperBounds to the max number of
// coins possible + 1. The upperbounds is the same as our best known min coins
// count. which at the start inst anything. Remember when the upperBounds on
// coin count is exceed the search will stop looking at that branch as a
// possible answer
// 2) Next initialize the stack. We only stack partial computation steps.
// The first item to add is the max coins we could have for the largest coin,
// later we will decrement from this until we reach zero
//
// 3) Now start the iteration loop. Pop off the current working item (note the
// actual algorithm is optimized and only pops from the stack if it really has
// finished that denomination). There are 2 updates to make to the stack
//
// 3a) make a copy of the current item and reduce the coin count in it for the
// current denomination then compute the theoretical lowerBound for the
// remaining largest denomination of coins (ie compute the max coins for
// highest remaining denomination excluding current denomination and better), if
// the theoretical lowerBound of coins that have to be used plus the current
// used coins is greater than the upperBound prune the entire branch.. it can
// not possibly bet the current known min. But if it is better than or equal
// we should push that back onto the stack as it may bet the current best
//
// 3b) Then make another copy of the current item as the next item. moving to
// the next highest coin denomination (excluding to ones which we have already
// chosen coins for) and compute the max coins that it can use and add it to a
// next item. Then check if its a terminal case (all coins used) if it inst
// terminal push that to the stack (it will pop off the stack as the next
// current) if it is the terminal then compare it to the upperBound if it can
// bet that update the best min and the upperBound
//
// repeat until the stack runs out.. then return the best min,
// So lets get started note i have the greedy and bot up with reach pruning
// versions for comparison
// A greedy implementation - a quick greedy way to the answer is to maximize
// the largest coin
//
// Note this can give the *wrong* answer
// Note expects reverse sorted coins (largest to smallest in denom)
std::vector<uint32_t> minCoins_greedy(uint32_t total,
const std::vector<uint32_t>& denom)
{
std::vector<uint32_t> res(denom.size()+1,0);
if (total == 0) return res;
for(int i = 0; i < denom.size(); ++i)
{
uint32_t count = total / denom[i];
total -= (count * denom[i]);
res[i] = count;
res[denom.size()] += count;
}
if (total != 0) res[denom.size()] = INT_MAX;
return res;
}
// ok so we can improve the bottom up DP version and shave a bit more time by
// visiting items in the order of current coin count as a result we will create
// a BFS style pruned search
//
// Note expects reverse sorted coins (largest to smallest in denom)
std::vector<uint32_t> minCoins_botUpReach(uint32_t total,
const std::vector<uint32_t>& denom)
{
// we are dividing by making the full decistion for each coin and then removing it from the list
if (total == 0) return std::vector<uint32_t>(denom.size()+1, 0);
// setup memory
std::vector< std::vector<uint32_t> > cache(total + 1, std::vector<uint32_t>(denom.size()+1, 0));
std::list<uint32_t> queue;
queue.push_back(0);
while(not queue.empty())
{
uint32_t curTotal = queue.front();
queue.pop_front();
for(int i = 0; i < denom.size(); ++i)
{
uint32_t newTotal = curTotal + denom[i];
if (newTotal <= total and
cache[newTotal][denom.size()] == 0)
{
// ok its empty
cache[newTotal] = cache[curTotal];
++(cache[newTotal][i]);
++(cache[newTotal][denom.size()]);
if (newTotal == total)
return cache[total];
queue.push_back(newTotal);
}
}
}
// impossible case
cache[0][denom.size()] = INT_MAX;
return cache[0];
}
// ok another idea we can reformulation top down upperBound and lowerBound
// squeeze tree pruning. greedy initialization of the upperbound
//
// Note expects reverse sorted coins (largest to smallest in denom)
std::vector<uint32_t> minCoins_topSqueeze(uint32_t total,
const std::vector<uint32_t>& denom)
{
const int LastIdx = denom.size() - 1;
const int TotalCoinsIdx = denom.size();
const int RemTotalIdx = denom.size() + 1;
const int WorkingIdx = denom.size() + 2;
const int Size = denom.size() + 3;
// prune the total of 0 corner case
if (total == 0) return std::vector<uint32_t>(denom.size()+1, 0);
std::vector<uint32_t> best(Size);
best[TotalCoinsIdx] = INT_MAX;
// remove 1 coin corner case
if (denom.size() < 2)
{
if (denom.size() == 1)
{
best[0] = total / denom[0];
best[TotalCoinsIdx] = (best[0] + denom[0]) == total ? best[0] : INT_MAX;
}
return best;
}
// whats my best guess min (upperbounds)
// dont use INT_MAX we are doing maths on it (make it overflowed max)
uint32_t upperBounds = total + 1;
// since we move through the denom vector its size is also the upper limit to
// the stack size
std::vector< std::vector<uint32_t> > stack(denom.size(), std::vector<uint32_t>(Size));
int stackTopIdx = 0;
// compute the max coins for the first layer thats the starting point
stack[0][0] = total / denom[0];
stack[0][TotalCoinsIdx] = stack[0][0]; // total coin count
stack[0][RemTotalIdx] = total - (stack[0][0]*denom[0]); // remaining total
stack[0][WorkingIdx] = 0; // denom working offset
while (stackTopIdx >= 0)
{
if (stackTopIdx >= stack.size()) std::cout << "Stack size assumption failed!\n";
std::vector<uint32_t>& current = stack[stackTopIdx];
uint32_t workingIdx = current[WorkingIdx];
// first generate the current coins reduction case for this level
if (current[workingIdx] > 0) // we have coin left in this layer
{
// compute is the absolute lower bonds the next coin level..
uint32_t nextCoinsLowerBounds
= current[RemTotalIdx] / denom[workingIdx+1];
// can this new count and the lower bounds of the next level of coins
// possibly bet the curent upper bounds?
if (current[TotalCoinsIdx]-1 + nextCoinsLowerBounds <= upperBounds)
{
// update the current top but first push a copy ahead of it
// for the next level of coins computation.
stack[stackTopIdx+1] = current;
// ok so remove one of current levels coins
current[workingIdx] -= 1;
current[TotalCoinsIdx] -= 1;
current[RemTotalIdx] += denom[workingIdx];
// move stack forward
++stackTopIdx;
}
}
// now generate the max case for the next level
++workingIdx;
std::vector<uint32_t>& next = stack[stackTopIdx];
// compute the max coins we can use for it and queue that
next[workingIdx] = next[RemTotalIdx] / denom[workingIdx];
next[TotalCoinsIdx] += next[workingIdx];
next[RemTotalIdx] -= denom[workingIdx] * next[workingIdx];
next[WorkingIdx] = workingIdx;
// check if this result is a terminal of the search
if (next[RemTotalIdx] == 0)
{
// it was the end and solves the problem
// remove it from the stack
--stackTopIdx;
// check if it bets the best
if (upperBounds > next[TotalCoinsIdx])
{
best = next;
upperBounds = best[TotalCoinsIdx];
}
}
else if(workingIdx == LastIdx) // because it can fail!
{
// its on the last coin and didnt correctly match totals. So its a
// broken version. Remove it.
--stackTopIdx;
}
}
return best;
}
void test(const char* label,
std::function<std::vector<uint32_t> (uint32_t, const std::vector<uint32_t>&)> func,
uint32_t n,
const std::vector<uint32_t>& denom)
{
auto start = std::chrono::system_clock::now();
std::vector<uint32_t> res = func(n, denom);
auto end = std::chrono::system_clock::now();
std::cout << label << ": result:";
for (int i = 0; i < denom.size(); ++i)
std::cout << denom[i] << "x" << res[i] << " ";
std::cout << " count:" << res[denom.size()] << "\n";
std::cout << " -- time:"
<< std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()
<< " nSec\n";
}
void testSet(uint32_t n,
const std::initializer_list<uint32_t>& dlist)
{
std::vector<uint32_t> denom(dlist);
std::cout << "\n*** test set for " << n << " *** \n";
if (n < 2001) test("botUpReach", minCoins_botUpReach ,n, denom);
else std::cout << "skipped botUpReach\n";
test("topSqueeze", minCoins_topSqueeze, n, denom);
}
int main()
{
// breakers
testSet(0, {20,15,5,4,1});
testSet(1, {2});
testSet(23, {20,15,5,4,1}); // attack greedy
testSet(31, {20,15,5,4,1}); // attack greedy
testSet(43, {20,15,5,4,1}); // attack greedy
testSet(46, {20,15,5,4,1}); // attack greedy
testSet(100, {20,15,5,4,1}); // attack bottom up
testSet(1000, {20,15,5,4,1});
testSet(1000, {13,7,1});
testSet(2000, {20,15,5,4,1});
testSet(10000, {20,15,5,4,1});
testSet(100000000, {20,15,5,4,1});
testSet(100000023, {20,15,5,4,1});
testSet(100000031, {20,15,5,4,1});
testSet(100000043, {20,15,5,4,1});
testSet(100000046, {20,15,5,4,1});
testSet(100000100, {20,15,5,4,1});
}
</pre><br />
<br />
<pre>*** test set for 0 ***
botUpReach: result:20x0 15x0 5x0 4x0 1x0 count:0
-- time:4100 nSec
topSqueeze: result:20x0 15x0 5x0 4x0 1x0 count:0
-- time:1614 nSec
*** test set for 1 ***
botUpReach: result:2x0 count:2147483647
-- time:14201 nSec
topSqueeze: result:2x0 count:2147483647
-- time:2656 nSec
*** test set for 23 ***
botUpReach: result:20x0 15x1 5x0 4x2 1x0 count:3
-- time:49285 nSec
topSqueeze: result:20x0 15x1 5x0 4x2 1x0 count:3
-- time:17680 nSec
*** test set for 31 ***
botUpReach: result:20x0 15x2 5x0 4x0 1x1 count:3
-- time:60896 nSec
topSqueeze: result:20x0 15x2 5x0 4x0 1x1 count:3
-- time:21468 nSec
*** test set for 43 ***
botUpReach: result:20x1 15x1 5x0 4x2 1x0 count:4
-- time:96826 nSec
topSqueeze: result:20x1 15x1 5x0 4x2 1x0 count:4
-- time:21117 nSec
*** test set for 46 ***
botUpReach: result:20x2 15x0 5x1 4x0 1x1 count:4
-- time:99908 nSec
topSqueeze: result:20x2 15x0 5x1 4x0 1x1 count:4
-- time:26414 nSec
*** test set for 100 ***
botUpReach: result:20x5 15x0 5x0 4x0 1x0 count:5
-- time:187759 nSec
topSqueeze: result:20x5 15x0 5x0 4x0 1x0 count:5
-- time:15795 nSec
*** test set for 1000 ***
botUpReach: result:20x50 15x0 5x0 4x0 1x0 count:50
-- time:2350739 nSec
topSqueeze: result:20x50 15x0 5x0 4x0 1x0 count:50
-- time:18175 nSec
*** test set for 1000 ***
botUpReach: result:13x76 7x1 1x5 count:82
-- time:2124334 nSec
topSqueeze: result:13x76 7x1 1x5 count:82
-- time:19523 nSec
*** test set for 2000 ***
botUpReach: result:20x100 15x0 5x0 4x0 1x0 count:100
-- time:4793008 nSec
topSqueeze: result:20x100 15x0 5x0 4x0 1x0 count:100
-- time:21261 nSec
*** test set for 10000 ***
skipped botUpReach
topSqueeze: result:20x500 15x0 5x0 4x0 1x0 count:500
-- time:22749 nSec
*** test set for 100000000 ***
skipped botUpReach
topSqueeze: result:20x5000000 15x0 5x0 4x0 1x0 count:5000000
-- time:23337 nSec
*** test set for 100000023 ***
skipped botUpReach
topSqueeze: result:20x5000000 15x1 5x0 4x2 1x0 count:5000003
-- time:65017 nSec
*** test set for 100000031 ***
skipped botUpReach
topSqueeze: result:20x5000000 15x2 5x0 4x0 1x1 count:5000003
-- time:50749 nSec
*** test set for 100000043 ***
skipped botUpReach
topSqueeze: result:20x5000001 15x1 5x0 4x2 1x0 count:5000004
-- time:42286 nSec
*** test set for 100000046 ***
skipped botUpReach
topSqueeze: result:20x5000002 15x0 5x1 4x0 1x1 count:5000004
-- time:44130 nSec
*** test set for 100000100 ***
skipped botUpReach
topSqueeze: result:20x5000005 15x0 5x0 4x0 1x0 count:5000005
-- time:14530 nSec
</pre>Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-69431407452539639532016-07-24T17:47:00.000+09:002016-07-25T23:35:15.721+09:00google interview: compute the minual coins Over all the result of this one is workable but I expect that there should be a way equal the greedy's performance in the asymptotic, because the fine details of least significant numbers have stop changing once you get far enough out of range of the largest coin.. hence it seems very plausible there is is a better algorithm larger numbers... I just dont have the time to work it out, maybe something that greedy guesses the largest coin value to some limit and then does the real search for the rest..<br />
<br />
Update: I have just found some more time and added one more improvement that cuts of more time (bottom up to reachable only) .. i had another improvement in mind (reachable + astar distance until end via max usable coin).. BUT this had 2 issues the astar is a greedy so not perfect and the use of a priority queue added significant overhead, destroying all benefit of it anyway.. <br />
<br />
Update2: I cracked the asymptotic constant version.. kicks these out of the room.. check out http://code-slim-jim.blogspot.com/2016/07/google-interview-compute-mininual-coins.html (next post after this one)<br />
<br />
<br />
<pre class="brush:cpp">// http://www.impactinterview.com/2009/10/140-google-interview-questions/#software_engineer
//
// Given a set of coin denominators, find the minimum number of coins to give a
// certain amount of change.
#include <cstdint>
#include <climits>
#include <vector>
#include <iostream>
#include <functional>
#include <chrono>
#include <memory>
#include <list>
// This question is quite interesting, even though it simple to ask the range
// of potential solutions is wide as far as i see it you have 2 main choices:
// 1) you can treat it as a graph and perform BFS, astar, etc to search on it
// 2) you can treat it as a recursion and dynamic programming question
// Given that this can form a tree you can also use greedy search to find a
// solution. BUT since you are searching for the *minimal* length the result
// from greedy is likely to be sub optimal.
// Also note that we can have multiple solutions to the problem depending on
// value and denominations 6 => 1+5 = 2+4 = ... etc
// Its also possible to have no solutions (for example exclude 1 from the denominations)
// So lets get started
// A greedy implementation - a quick greedy way to the answer is to maximize
// the largest coin
//
// Note this can give the *wrong* answer
// Note expects reverse sorted coins (largest to smallest)
std::vector<uint32_t> minCoins_greedy(uint32_t total,
const std::vector<uint32_t>& denom)
{
std::vector<uint32_t> res(denom.size()+1,0);
if (total == 0) return res;
for(int i = 0; i < denom.size(); ++i)
{
uint32_t count = total / denom[i];
total -= (count * denom[i]);
res[i] = count;
res[denom.size()] += count;
}
if (total != 0) res[denom.size()] = INT_MAX;
return res;
}
// The recursive implementation - simply walk the entire tree of possibilities
// First we need to formulate the induction. We will add one coin each time
// and then recurse. Hence the induction v[n] number of coins for total n is:
// v[n] = 1 + v[n - c]
//
// Note denoms need to be in reverse order.. max to min!
std::vector<uint32_t> minCoins_naive(uint32_t total,
const std::vector<uint32_t>& denom)
{
std::vector<uint32_t> res(denom.size()+1,0);
if (total == 0) return res;
res[denom.size()] = INT_MAX;
for(int i = 0; i < denom.size(); ++i)
{
if (total >= denom[i])
{
std::vector<uint32_t> pass = minCoins_naive(total - denom[i],
denom);
if ((pass[denom.size()] + 1) < res[denom.size()])
{
res = pass;
++(res[i]);
++(res[denom.size()]);
}
}
}
return res;
}
// Now the naive recursion is also computing items that we know are already
// larger than our current known best min hence we can the prune tree here -
// simply walk the entire tree until we reach a depth greater than the current
// best min then stop because it cant be down there anymore
//
// Note denoms need to be in reverse order.. max to min!
std::vector<uint32_t> minCoins_prunedImpl(uint32_t total,
const std::vector<uint32_t>& denom,
uint32_t depth,
uint32_t maxDepth)
{
// found trim point..
if (depth > maxDepth)
{
// terminate this search path
std::vector<uint32_t> res(denom.size()+1,0);
res[denom.size()] = INT_MAX;
return res;
}
// compute the greedy answer..
std::vector<uint32_t> res = minCoins_greedy(total,denom);
// found loop termination..
if (res[denom.size()] == 1)
return res;
for(int i = 0; i < denom.size(); ++i)
{
if (total >= denom[i])
{
uint32_t newMaxDepth = (maxDepth < depth + res[denom.size()] ) ? maxDepth : (depth + res[denom.size()] );
std::vector<uint32_t> pass = minCoins_prunedImpl(total - denom[i],
denom,
depth+1,
newMaxDepth );
if ((pass[denom.size()] + 1) < res[denom.size()])
{
res = pass;
++(res[i]);
++(res[denom.size()]);
}
}
}
return res;
}
std::vector<uint32_t> minCoins_pruned(uint32_t total,
const std::vector<uint32_t>& denom)
{
return minCoins_prunedImpl(total,
denom,
0,
INT_MAX);
}
// Ok so we know now that the naive isn't smart, and pruning improves this but
// the approach is still wasting allot of time... what if approach it as a
// divide and conquer problem
//
// To achieve this we first reformulate the induction slightly
// v[n] = 1 + v[n - c]
// into
// v[n] = v[c] + v[n - c]
//
// now we have moved from 1 coin at a time on the left and many on the right to
// a full divide and conquer with many coins on both left and right. Since we are
// now choosing multiple coins at one time we may as well pick all the coins from
// each denomination and then remove it from the list.
//
// Note denoms need to be in reverse order.. max to min!
std::vector<uint32_t> minCoins_divAndConqImpl(uint32_t total,
const uint32_t* denomStart,
const uint32_t* denomEnd)
{
std::size_t denomSize = denomEnd - denomStart;
uint32_t maxCount = total / *denomStart;
std::vector<uint32_t> res(denomSize+1,0);
res[denomSize] = INT_MAX;
if (denomSize == 1)
{
// last case cant search... so just give the only result possible
uint32_t leftTotal = total - (maxCount * *denomStart);
if (leftTotal == 0)
{
res[0] = maxCount;
res[1] = maxCount;
}
return res;
}
uint32_t count = maxCount+1;
{
// first case might be special... no need to check it every time
if (total == (maxCount * *denomStart))
{
res[0] = maxCount;
res[denomSize] = maxCount;
count--;
}
}
while (count > 0)
{
--count;
uint32_t leftTotal = total - (count * *denomStart);
std::vector<uint32_t> sub_res = minCoins_divAndConqImpl(leftTotal,
denomStart + 1,
denomEnd);
if (sub_res[denomSize-1] != INT_MAX and
(sub_res[denomSize - 1] + count) < res[denomSize])
{
res[0] = count;
for (int i = 0; i < denomSize; ++i) // NOTE the oddness.. because its copying total size as well
res[i+1] = sub_res[i];
res[denomSize] += count;
}
}
return res;
}
std::vector<uint32_t> minCoins_divAndConq(uint32_t total,
const std::vector<uint32_t>& denom)
{
// we are dividing by making the full decision for each coin and then removing it from the list
const uint32_t* denomStart = &(denom[0]);
const uint32_t* denomEnd = &(denom[denom.size()-1]);
denomEnd += 1;
return minCoins_divAndConqImpl(total,
denomStart,
denomEnd);
}
// ok so the divide and conqueror makes a big difference.. but what if we stop
// looking at this as a recursive problem and look at it from a dynamic
// programming perspective, we will start with the top down approach and simply
// cache the results from the prior recursive calls
//
// Note denoms need to be in reverse order.. max to min!
std::vector<uint32_t>& minCoins_topImpl(uint32_t total,
const std::vector<uint32_t>& denom,
std::vector<std::vector<uint32_t> >& cache)
{
std::vector<uint32_t>& res = cache[total];
if (res[denom.size()] > 1 or total == 0)
{
return res;
}
res[denom.size()] = INT_MAX; // max and over flow the count..
for(int i = 0; i < denom.size(); ++i)
{
if (total >= denom[i])
{
std::vector<uint32_t>& pass = minCoins_topImpl(total - denom[i],
denom,
cache);
if ((pass[denom.size()] + 1) < res[denom.size()])
{
res = pass;
++(res[i]);
++(res[denom.size()]);
}
}
}
return res;
}
std::vector<uint32_t> minCoins_top(uint32_t total,
const std::vector<uint32_t>& denom)
{
// setup memory
std::vector<std::vector<uint32_t> > cache;
cache.resize(total + 1);
for (int i = 0; i < total+1; ++i)
cache[i].resize(denom.size()+1);
return minCoins_topImpl(total, denom, cache);
}
// ok but as always we can reverse the approach and produce a bottom-up
// dynamic programming implementation and save our selves all the call overheads
// since in this problem the tree search is not sparse and every case will be
// visited
//
// Note denoms need to be in reverse order.. max to min!
std::vector<uint32_t> minCoins_botImpl(uint32_t total,
const std::vector<uint32_t>& denom,
std::vector<std::vector<uint32_t> >& cache)
{
// compute
for (int target = 1; target <= total; ++target)
{
cache[target][denom.size()] = INT_MAX; // set count to max and overflowed
for(int i = 0; i < denom.size(); ++i)
{
if (target >= denom[i])
{
uint32_t delta = target - denom[i];
if (cache[delta][denom.size()] + 1 < cache[target][denom.size()])
{
cache[target] = cache[delta];
++(cache[target][i]);
++(cache[target][denom.size()]);
}
}
}
}
return cache[total];
}
std::vector<uint32_t> minCoins_bot(uint32_t total,
const std::vector<uint32_t>& denom)
{
// setup memory
std::vector<std::vector<uint32_t> > cache;
cache.resize(total + 1);
for (int i = 0; i < total+1; ++i)
cache[i].resize(denom.size()+1);
return minCoins_botImpl(total,
denom,
cache);
}
// ok so we can improve the bottom up DP version and shave a bit more time by
// visiting items in the order of current coin count as a result we will create
// only the counts that reach towards the
//
std::vector<uint32_t> minCoins_botUpReach(uint32_t total,
const std::vector<uint32_t>& denom)
{
// we are dividing by making the full decistion for each coin and then removing it from the list
if (total == 0) return std::vector<uint32_t>(denom.size()+1, 0);
// setup memory
std::vector< std::vector<uint32_t> > cache(total + 1, std::vector<uint32_t>(denom.size()+1, 0));
std::list<uint32_t> queue;
queue.push_back(0);
while(not queue.empty())
{
uint32_t curTotal = queue.front();
queue.pop_front();
for(int i = 0; i < denom.size(); ++i)
{
uint32_t newTotal = curTotal + denom[i];
if (newTotal <= total and
cache[newTotal][denom.size()] == 0)
{
// ok its empty
cache[newTotal] = cache[curTotal];
++(cache[newTotal][i]);
++(cache[newTotal][denom.size()]);
if (newTotal == total)
return cache[total];
queue.push_back(newTotal);
}
}
}
// impossible case
cache[0][denom.size()] = INT_MAX;
return cache[0];
}
// over all the results are:
// * the greedy continues to kick butt but can be wrong.. this was expected as it
// guess and looks at 1 path to the solution
// * recursive and depth pruned implementation work but are awful because they
// repeat the same work over and over
// * the divide and conquer performs well and bets the dynamic programming
// implementations when the target n is small... this was a surprising result to
// me as the top down implementation effectively tries to exhaust the largest
// coins first, and does this by calling the cache value mid point.. hence i
// expected it to bet the divide and conquer, because it does effectively
// the same with caching
// * the dynamic programming versions work best in the long term they:
// + are pruning because they use the cache results to effect early termination
// + effectively implement the divide and conquer version by taking largest
// coin first and it should be faster then the div/conq because it can use the
// the cached result mid point and can trim the computations of multiple coins draws
// with the same value (hence its pointless to DP covert the div/conq version)
// + The bottom up DP also optimizes further by removing the function call overhead
void test(const char* label,
std::function<std::vector<uint32_t> (uint32_t, const std::vector<uint32_t>&)> func,
uint32_t n,
const std::vector<uint32_t>& denom)
{
auto start = std::chrono::system_clock::now();
std::vector<uint32_t> res = func(n, denom);
auto end = std::chrono::system_clock::now();
std::cout << label << ": result:";
for (int i = 0; i < denom.size(); ++i)
std::cout << denom[i] << "x" << res[i] << " ";
std::cout << " count:" << res[denom.size()] << "\n";
std::cout << " -- time:"
<< std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()
<< " nSec\n";
}
void testSet(uint32_t n,
const std::initializer_list<uint32_t>& dlist)
{
std::vector<uint32_t> denom(dlist);
std::cout << "\n*** test set for " << n << " *** \n";
test("greedy ", minCoins_greedy,n, denom);
if (n < 30) test("naive ", minCoins_naive ,n, denom);
else std::cout << "skipping naive\n";
if (n < 200) test("pruned ", minCoins_pruned,n, denom);
else std::cout << "skipping pruned\n";
if (n < 2000) test("divAndConq", minCoins_divAndConq,n, denom);
else std::cout << "skipping div and conqurer\n";
test("top ", minCoins_top ,n, denom);
test("bot ", minCoins_bot ,n, denom);
test("botUpReach", minCoins_botUpReach ,n, denom);
}
int main()
{
// breakers
testSet(0, {20,15,5,4,1});
testSet(1, {2});
testSet(23, {20,15,5,4,1}); // attack greedy
testSet(46, {20,15,5,4,1}); // attack greedy
testSet(31, {20,15,5,4,1}); // attack greedy
testSet(43, {20,15,5,4,1}); // attack greedy
testSet(100, {20,15,5,4,1}); // attack bottom up
testSet(1000, {20,15,5,4,1});
testSet(1000, {13,7,1});
testSet(10000, {20,15,5,4,1});
}
</pre>The output looks like:<br />
<pre>*** test set for 0 ***
greedy : result:20x0 15x0 5x0 4x0 1x0 count:0
-- time:1852 nSec
naive : result:20x0 15x0 5x0 4x0 1x0 count:0
-- time:572 nSec
pruned : result:20x0 15x0 5x0 4x0 1x0 count:0
-- time:987 nSec
divAndConq: result:20x0 15x0 5x0 4x0 1x0 count:0
-- time:443 nSec
top : result:20x0 15x0 5x0 4x0 1x0 count:0
-- time:3814 nSec
bot : result:20x0 15x0 5x0 4x0 1x0 count:0
-- time:1658 nSec
botUpReach: result:20x0 15x0 5x0 4x0 1x0 count:0
-- time:329 nSec
*** test set for 1 ***
greedy : result:2x0 count:2147483647
-- time:548 nSec
naive : result:2x0 count:2147483647
-- time:388 nSec
pruned : result:2x0 count:2147483647
-- time:563 nSec
divAndConq: result:2x0 count:2147483647
-- time:420 nSec
top : result:2x0 count:2147483647
-- time:2380 nSec
bot : result:2x0 count:2147483647
-- time:1881 nSec
botUpReach: result:2x0 count:2147483647
-- time:3839 nSec
*** test set for 23 ***
greedy : result:20x1 15x0 5x0 4x0 1x3 count:4
-- time:529 nSec
naive : result:20x0 15x1 5x0 4x2 1x0 count:3
-- time:5941762 nSec
pruned : result:20x0 15x1 5x0 4x2 1x0 count:3
-- time:69194 nSec
divAndConq: result:20x0 15x1 5x0 4x2 1x0 count:3
-- time:9601 nSec
top : result:20x0 15x1 5x0 4x2 1x0 count:3
-- time:26959 nSec
bot : result:20x0 15x1 5x0 4x2 1x0 count:3
-- time:19781 nSec
botUpReach: result:20x0 15x1 5x0 4x2 1x0 count:3
-- time:14906 nSec
*** test set for 46 ***
greedy : result:20x2 15x0 5x1 4x0 1x1 count:4
-- time:534 nSec
skipping naive
pruned : result:20x2 15x0 5x1 4x0 1x1 count:4
-- time:798560 nSec
divAndConq: result:20x2 15x0 5x1 4x0 1x1 count:4
-- time:46203 nSec
top : result:20x2 15x0 5x1 4x0 1x1 count:4
-- time:56899 nSec
bot : result:20x2 15x0 5x1 4x0 1x1 count:4
-- time:37923 nSec
botUpReach: result:20x2 15x0 5x1 4x0 1x1 count:4
-- time:32182 nSec
*** test set for 31 ***
greedy : result:20x1 15x0 5x2 4x0 1x1 count:4
-- time:561 nSec
skipping naive
pruned : result:20x0 15x2 5x0 4x0 1x1 count:3
-- time:134656 nSec
divAndConq: result:20x0 15x2 5x0 4x0 1x1 count:3
-- time:16872 nSec
top : result:20x0 15x2 5x0 4x0 1x1 count:3
-- time:38468 nSec
bot : result:20x0 15x2 5x0 4x0 1x1 count:3
-- time:25553 nSec
botUpReach: result:20x0 15x2 5x0 4x0 1x1 count:3
-- time:19032 nSec
*** test set for 43 ***
greedy : result:20x2 15x0 5x0 4x0 1x3 count:5
-- time:480 nSec
skipping naive
pruned : result:20x1 15x1 5x0 4x2 1x0 count:4
-- time:731592 nSec
divAndConq: result:20x1 15x1 5x0 4x2 1x0 count:4
-- time:34070 nSec
top : result:20x1 15x1 5x0 4x2 1x0 count:4
-- time:54569 nSec
bot : result:20x1 15x1 5x0 4x2 1x0 count:4
-- time:35912 nSec
botUpReach: result:20x1 15x1 5x0 4x2 1x0 count:4
-- time:30253 nSec
*** test set for 100 ***
greedy : result:20x5 15x0 5x0 4x0 1x0 count:5
-- time:567 nSec
skipping naive
pruned : result:20x5 15x0 5x0 4x0 1x0 count:5
-- time:7856557 nSec
divAndConq: result:20x5 15x0 5x0 4x0 1x0 count:5
-- time:371883 nSec
top : result:20x5 15x0 5x0 4x0 1x0 count:5
-- time:107881 nSec
bot : result:20x5 15x0 5x0 4x0 1x0 count:5
-- time:79875 nSec
botUpReach: result:20x5 15x0 5x0 4x0 1x0 count:5
-- time:60365 nSec
*** test set for 1000 ***
greedy : result:20x50 15x0 5x0 4x0 1x0 count:50
-- time:728 nSec
skipping naive
skipping pruned
divAndConq: result:20x50 15x0 5x0 4x0 1x0 count:50
-- time:1654224850 nSec
top : result:20x50 15x0 5x0 4x0 1x0 count:50
-- time:838802 nSec
bot : result:20x50 15x0 5x0 4x0 1x0 count:50
-- time:766986 nSec
botUpReach: result:20x50 15x0 5x0 4x0 1x0 count:50
-- time:736739 nSec
*** test set for 1000 ***
greedy : result:13x76 7x1 1x5 count:82
-- time:651 nSec
skipping naive
skipping pruned
divAndConq: result:13x76 7x1 1x5 count:82
-- time:1220761 nSec
top : result:13x76 7x1 1x5 count:82
-- time:704132 nSec
bot : result:13x76 7x1 1x5 count:82
-- time:685573 nSec
botUpReach: result:13x76 7x1 1x5 count:82
-- time:686982 nSec
*** test set for 10000 ***
greedy : result:20x500 15x0 5x0 4x0 1x0 count:500
-- time:559 nSec
skipping naive
skipping pruned
skipping div and conqurer
top : result:20x500 15x0 5x0 4x0 1x0 count:500
-- time:8379256 nSec
bot : result:20x500 15x0 5x0 4x0 1x0 count:500
-- time:7830593 nSec
botUpReach: result:20x500 15x0 5x0 4x0 1x0 count:500
-- time:7763225 nSec
</pre>Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-29170749907079641422016-07-24T17:42:00.001+09:002016-07-24T17:43:05.713+09:00Google interview: Compute the Fibonacci sequance<pre class="brush:cpp">// Write code that computes the Fibonacci sequance upto n
#include <cstdint>
#include <vector>
#include <iostream>
#include <functional>
#include <chrono>
// Recursive implementations of Fibonacci are all the rage in a teaching
// environment as it demonstrates recursion nicely. The problem is that a recursive
// implementation is completely a inefficient way to code the task in practice.
// Each call of function calls 2 more and those 2 call 2 more each resulting in 4.
// And those 4 call 4 more each resulting in 8 so at this point your space and time
// complexity is 1 + 2 + 4 + 8 + ... which is exponential growth and just awful
uint32_t fib_naive(uint32_t n)
{
if (n == 0) return 0;
if (n == 1) return 1;
return fib_naive(n-1) + fib_naive(n-2);
}
// So what google is really testing here is the "dynamic programming" paradigm.
// https://en.wikipedia.org/wiki/Dynamic_programming. Unfortunately the term
// "dynamic programming" has had its meaning diluted and corrupted a bit. In
// this context what it means is the breaking of a larger problem into sub
// parts and solving each sub part to solve the whole. This sounds exactly
// like recursion and it is, the distinction is that recursive implementations
// tend to stop at raw mathematical conversion where as DP implementations look
// at lazy caching results to cut off repeated computations
// There are two textbook approaches to this; top-down and bottom-up
// in the top-down approach you start working from the answer point and compute
// the values of the breakdowns, you deploy memory as needed to cache the
// results of prior computations so that if they are used again you use the
// cached value rather than recomputing it from scratch again
uint32_t fib_top_core(uint32_t n, std::vector<uint32_t>& cache)
{
if (n > 0 and cache[n] == 0)
{
cache[n] = fib_top_core(n-2,cache) + fib_top_core(n-1,cache);
}
return cache[n];
}
uint32_t fib_top(uint32_t n)
{
if (n == 0) return 0;
std::vector<uint32_t> cache(n+1, 0);
cache[1] = 1;
return fib_top_core(n, cache);
}
// in the bottom-up approach you start working from the known terminators
// and work your way up to the answer point. You do this by computing
// the values of all the breakdowns you could possibly need and memorize
// these the results for use in the next computations. Often you can optimize
// the memory to forget values that we know we will not revisit
uint32_t fib_bot(uint32_t n)
{
if (n == 0) return 0;
uint32_t fib_minus_2 = 0;
uint32_t fib_minus_1 = 0;
uint32_t fib_current = 1;
while (n > 1)
{
fib_minus_2 = fib_minus_1;
fib_minus_1 = fib_current;
fib_current = fib_minus_2 + fib_minus_1;
--n;
}
return fib_current;
}
void test(std::function<uint32_t(uint32_t)> func, uint32_t n)
{
auto start = std::chrono::system_clock::now();
std::cout << "result:" << func(n);
auto end = std::chrono::system_clock::now();
std::cout << " -- time:"
<< std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()
<< "nSec\n";
}
int main()
{
int n = 40;
test(fib_naive,n);
test(fib_top ,n);
test(fib_bot ,n);
}
</pre><br />
and the output looks like:<br />
<pre>result:102334155 -- time:1282786361 nSec
result:102334155 -- time:51012 nSec
result:102334155 -- time:690 nSec
</pre>Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-86765718089311160962016-07-20T23:44:00.003+09:002016-07-21T22:01:02.710+09:00google interview: find number in multi disk list better than O(logn)<br />
<pre class="brush:cpp">// http://www.impactinterview.com/2009/10/140-google-interview-questions/#software_engineer
//
// Find or determine non existence of a number in a sorted list
// of N numbers where the numbers range over M, M>> N and N large
// enough to span multiple disks. Algorithm to beat O(log n) bonus
// points for constant time algorithm.
// the const time algo is called a "bloom filter" if you work with large
// databases you (hopefully) learn this.. what a bloom filter does on insert
// is hash your key in 3 ways and then light up flags in 3 indexes.. on lookup
// it hashs again and checks the 3 places.. if the any one the flags is off
// then you have a item that doesnt exist.. otherwise commence your normal
// logN search
#include <iostream>
#include <vector>
#include <stdint.h>
#include <memory>
#include <cstring>
#include <cstdlib>
#include <chrono>
#define BLOOMFILTER
class PagedVector
{
public:
typedef uint64_t Key; // ideally this would be a template param
typedef std::vector<Key> Keys;
typedef std::shared_ptr<Keys> KeysHandle;
typedef std::vector<KeysHandle> PagedKeys;
const uint32_t filterSize = 256; // ideally this would be tunable to reduce misses/mem use
const uint32_t filterMask = filterSize - 1;
private:
std::size_t threashold_;
PagedKeys pages_;
std::vector<bool> presence_[3]; // the bloom filter
// the divide part of a divide and conqurer search
template <typename Iterator,
typename Compare,
typename Splitter>
struct Divider
{
Compare& comparer_;
Splitter& splitter_;
Divider(Compare& comparer,
Splitter& splitter) :
comparer_(comparer),
splitter_(splitter)
{}
void operator()(Iterator& a,
Iterator& b)
{
// should never happen unless we are empty..
if (b==a) return;
Iterator n = splitter_(a,b);
if (comparer_(n))
{
b = n;
return;
}
a = n + 1;
}
};
// All divide and conqurer searches do 1 thing.. they split the data in
// some way and repeat until the desired location is found.. In this
// system we have mutliple uses of the search.. they are used to search and find insertion points
template <typename Iterator,
typename Compare,
typename Splitter>
Iterator search(Iterator begin,
Iterator end,
Compare comparer,
Splitter splitter)
{
// point at the match or the point to insert in if it doesnt match
Divider<Iterator, Compare, Splitter> Divider(comparer, splitter);
// narrows the sub range in which to keep searching when the range is 1 item we are done..
while (1)
{
Divider(begin,end);
if (end == begin)
return begin;
}
}
struct FindInPage
{
Key target_;
FindInPage(Key target) :
target_(target)
{}
bool operator()(const PagedKeys::iterator& a)
{
// ok slight strangeness here.. pages are a range of numers
// t < [b,e] then true
// t > [b,e] then false
// but
// t > b, t < e its equal.. so it has to be absolete less than!
const KeysHandle& pageHandle = *a;
if (pageHandle->size() > 0)
{
return target_ <= (*pageHandle)[pageHandle->size()-1];
}
return true; // empty... should be the last if it exists..
}
};
struct SplitPage
{
Key target_;
SplitPage()
{}
PagedKeys::iterator operator()(const PagedKeys::iterator& a,
const PagedKeys::iterator& b)
{
std::size_t offset = (b-a)/2;
return a + offset;
}
};
struct FindInVector
{
Key target_;
FindInVector(Key target) :
target_(target)
{}
bool operator()(const Keys::iterator& a)
{
return target_ <= (*a);
}
};
struct SplitVector
{
Key target_;
SplitVector(Key target) :
target_(target)
{}
Keys::iterator operator()(const Keys::iterator& a,
const Keys::iterator& b)
{
// std::size_t offset = (b-a)/2;
// return a + offset;
if ((b-a) <= 2)
{
std::size_t offset = (b-a)/2;
return a + offset;
}
Key high = *(b-1);
Key low = *a;
// since we have the values...
if (target_ <= low) return a;
if (target_ >= high) return b-1;
double splitPercent;
splitPercent = static_cast<double>(target_ - low) / static_cast<double>(high-low);
splitPercent = (splitPercent < 0.0) ? 0.0 : splitPercent;
std::size_t range = (b-a);
std::size_t offset = range * splitPercent;
return a + offset;
}
};
// retun a fax iterator to the page and page offset where the item is or should be
std::pair<PagedKeys::iterator, Keys::iterator> find(Key target)
{
std::pair<PagedKeys::iterator, Keys::iterator> iter;
iter.first = search(pages_.begin(),
pages_.end(),
FindInPage(target),
SplitPage());
if (iter.first == pages_.end())
iter.first = pages_.end() - 1;
KeysHandle& page = *(iter.first);
// find the location to add into the list
iter.second = search(page->begin(),
page->end(),
FindInVector(target),
SplitVector(target));
return iter;
}
public:
PagedVector(std::size_t threashold) :
threashold_(threashold),
pages_(),
presence_()
{
for (int i = 0; i < 3; ++i)
presence_[i].resize(filterSize);
// make it a bit more easy on the boarder conditions
KeysHandle newPage(new Keys);
pages_.push_back(newPage);
}
// add the target to the paged data struture
void insert(Key target)
{
#ifdef BLOOMFILTER
// not the best way but assume we are doing a item by item insert
uint64_t hash = std::hash<Key>()(target);
// light the bloomfilters presence bits according to the hash
presence_[0][ hash & filterMask] = true;
presence_[1][(hash >> 8) & filterMask] = true;
presence_[2][(hash >> 16) & filterMask] = true;
#endif
// find the relevent part of the list
std::pair<PagedKeys::iterator, Keys::iterator> iter = find(target);
KeysHandle& page = *(iter.first);
page->insert(iter.second, target);
// then threshold check.
if (page->size() > threashold_)
{
// ok over the threshold.. so divide the list in half
KeysHandle newPage(new Keys);
uint32_t spliceAt = page->size() / 2;
uint32_t sizeAfter = page->size() - spliceAt;
newPage->resize(sizeAfter);
std::memcpy(&((*newPage)[0]), &((*page)[spliceAt]), sizeAfter * sizeof(Key));
page->resize(spliceAt);
pages_.insert(iter.first + 1, newPage);
}
}
bool exists(Key target)
{
#ifdef BLOOMFILTER
uint64_t hash = std::hash<Key>()(target);
// a quick O(1) pre check before the more costly real check
if (not (presence_[0][ hash & filterMask] and
presence_[1][(hash >> 8) & filterMask] and
presence_[2][(hash >> 16) & filterMask]))
return false;
#endif
// ok technically the question was about this.. so no std:: here..
// basically have to implement a better then log(N) search
std::pair<PagedKeys::iterator, Keys::iterator> iter = find(target);
return *(iter.second) == target;
}
const PagedKeys& pages() const
{
return pages_;
}
void print(std::ostream& os) const
{
for ( KeysHandle page : pages_)
{
os << "\n ########################################## \n";
for ( Key item : *page)
{
os << " " << item;
}
}
}
};
std::ostream& operator<<(std::ostream& os, const PagedVector& list)
{
list.print(os);
return os;
}
void validate_sorted(PagedVector& theList)
{
bool failed = false;
bool first = true;
PagedVector::Key prev;
for ( PagedVector::KeysHandle page : theList.pages() )
{
for ( PagedVector::Key item : *page)
{
if (first)
{
first = false;
prev = item;
}
else
{
if (prev > item)
failed = true;
prev = item;
}
}
}
std::cout << (failed ? "FAILED" : "passed")
<< "\n";
}
void test_random()
{
PagedVector theList(5000);
auto istart = std::chrono::system_clock::now();
for (int i = 0; i < 1000000; ++i)
{
PagedVector::Key item = (std::rand() % 100000)*2;
// std::cout << "adding:" << item << "\n";
theList.insert(item);
}
auto iend = std::chrono::system_clock::now();
std::cout << "inserted time:"
<< std::chrono::duration_cast<std::chrono::milliseconds>(iend - istart).count()
<< "mSec \n";
// std::cout << theList << "\n";
int count = 0;
int exist = 0;
auto cstart = std::chrono::system_clock::now();
for (int i = 0; i < 1000000; ++i)
{
PagedVector::Key item = std::rand() % 200000;
if (theList.exists(item))
++exist;
++count;
// std::cout << " " << item << " "
// << (theList.exists(item) ? "exists" : "missing")
// << "\n";
}
auto cend = std::chrono::system_clock::now();
std::cout << "checking time:"
<< std::chrono::duration_cast<std::chrono::milliseconds>(cend - cstart).count()
<< "mSec\n\n";
// expecting a 50% hit rate
std::cout << "checked:"
<< count
<< " exist:" << exist
<< "\n";
validate_sorted(theList);
}
int main()
{
test_random();
}
</pre>Output with a bloomfilter is:<br />
<pre>inserted time:2613mSec
checking time:735mSec
checked:1000000 exist:499965
passed
</pre><br />
The output without the filter is<br />
<pre>inserted time:2584mSec
checking time:1292mSec
checked:1000000 exist:499965
passed
</pre><br />
Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-3138392343540901472016-07-20T21:18:00.002+09:002016-07-20T21:40:44.519+09:00Google interview: implement sort X and discuss its space/time complexityThe problem with sort algorithms is there are many tricks to making them quicker. These tricks also make it more complex to do in an interview. <br />
<br />
I have failed a google interview in the past because of this, at that time my interviewer asked me to code a *stable* quick sort on the white board.. The moment it occurred to just how tricky the code will be to do on a white board i simply froze up and promptly forgot everything.. I went down the rabbit hole so to speak, worst interview ever.. <br />
<br />
So i have tried to keep the algorithms as simple as possible. My tactic here is to break them into an to easy to remember set of sub operations. They are not optimal versions. <br />
<br />
<pre class="brush:cpp">#include <iostream>
#include <vector>
#include <functional>
// ***********************************************
// ************** ITERATIVE SORTS ****************
// ***********************************************
void insertionSort(std::vector<int>& list)
{
// ok insertion sort takes current object a inserts it in the correct place
// inplace (excluding swap memory)
// stable ordering
// -- because the item only moves if less than.. equals stop the inner loop
// average case: O(n^2)
// -- because it has to do some percent of the inner loop lets say 0.5..
// hence 0.5*N*N is still O(N^2)
// best case: O(n)
// -- sorted and nothing moves
// worst case: O(n^2)
// -- reversed and everything moves
for (int i = 1 ; i < list.size(); ++i)
{
// now swap item down until its in place
int j = i;
while (j > 0 and list[j] < list[j-1])
{
int temp = list[j];
list[j] = list[j-1];
list[j-1] = temp;
--j;
}
}
}
void selectionSort(std::vector<int>& list)
{
// selection sort finds the correct object and for the current place
//
// inplace
// stable ordering
// -- because the first of of equal items is selected
// average case: O(n^2)
// -- same reason as best
// best case: O(n^2)
// -- even sorted u have to check all items left over
// worst case: O(n^2)
// -- everything moves
for (int i = 0 ; i < list.size(); ++i)
{
int selection = i;
for (int j = i; j < list.size(); ++j)
{
if (list[selection] > list[j])
selection = j;
}
if (selection != i)
{
int temp = list[i];
list[i] = list[selection];
list[selection] = temp;
}
}
}
// ***********************************************
// *********** DIVIDE AND CONQUER SORTS **********
// ***********************************************
void swap(std::vector<int>::iterator a,
std::vector<int>::iterator b)
{
int temp = *a;
*a = *b;
*b = temp;
}
void quickSort(std::vector<int>::iterator begin,
std::vector<int>::iterator end)
{
// ok divide and conquer by partition on a pivot
// inplace
// unstable
// -- because pivot moves out of order and then we swap left end woith right end
// average case O(nlogn)
// -- we linear scan the array and swap, with hopefully leaves a 50/50
// split and recurse for log(n) deep
// best case O(nlogn)
// -- still have to scan for the swaps..
// worst case O(n^2)
// -- if the pivot value sucks it will split all 2 one side
// the range is [begin, end)..
if ((end - begin) < 2) return;
// choose pivot.. lets say midway...
std::vector<int>::iterator p = begin + (end - begin)/2;
int pivot = *(p);
std::vector<int>::iterator head = begin;
std::vector<int>::iterator tail = end - 1;
// swap partition out of the way - If you dont remove a value then *when*
// partitioning fails you end up stuck in an infinite loop
swap(p, tail);
--tail;
// partition
while (head != tail)
{
if (*head <= pivot)
// head is in right place move head up
++head;
else
{
// swivel head to tail and move tail down
swap(head, tail);
--tail;
}
}
// check last item againest the pivot
if (*head <= pivot) ++head;
// swap the pivot back to the middle.
swap(head, end-1);
// note carefully this is infinite loop avoidance... the head now points
// at the pivot so exclude that from recursion
quickSort(begin, head); // Note [begin, head)
quickSort(head+1, end); // Note (head, end)
}
std::vector<int>::iterator rotate(std::vector<int>::iterator begin,
std::vector<int>::iterator mid,
std::vector<int>::iterator end)
{
std::vector<int>::iterator p1 = begin;
std::vector<int>::iterator p2 = mid;
while (1)
{
swap(p1,p2);
++p1;
++p2;
if (p1 >= mid and p2 == end) break;
if (p2 == end) p2 = mid;
}
return begin + (end - mid);
}
std::vector<int>::iterator stablePartition(std::vector<int>::iterator cursor,
std::vector<int>::iterator end,
std::function<bool (int)> op)
{
// Note "op" will be true if its in the left side
// move through the data until we find the end of the *first* left
// section
while (cursor < end and op(*cursor)) ++cursor;
// record the start of the right section
std::vector<int>::iterator rightStart = cursor;
// find the end of the right section and the start of out of order
// left section
while (cursor < end and not op(*cursor)) ++cursor;
while (cursor < end)
{
// if we are not at the end of the entire range theb the opFlase secton
// has to be *out of place*
// So record the end of the out of place right section
std::vector<int>::iterator rightEnd = cursor;
// find end of the out of place left section
while (cursor < end and op(*cursor)) ++cursor;
// then rotate the out of order left section in front of the right
// section, and update the rightStart to the new divide point
rightStart = rotate(rightStart, rightEnd, cursor);
// then move the cursor to the new end of the right section, starting
// from the end of the prior searched region
while (cursor < end and not op(*cursor)) ++cursor;
}
// return the divide point between the left and rightsections
return rightStart;
}
void stableQuickSort(std::vector<int>::iterator begin,
std::vector<int>::iterator end)
{
// divide and conqure using *stable* partitioning of data
// the trick with stable quick sort is that you rotate the sequances of
// data to instead of swap the start and end to keep the data in order.
//
// It is a real pain.. and tricky to get right... and would be very
// difficult to do in an interview because partition failure avoidance is
// much harder.. u need to break the sort range into 3 parts so u can leave
// out the middle an avoid the infinite loop when partitioning fails
//
// AND NOTE BEST i have been asked this in a interview with google before.. (and i failed)
//
// inplace
// stable
// -- the use of rotates moves the entire sequence keeping order
// -- however care must be taken so that pivot is also stable. The way I
// see it there is only 1 real option you have to rotate and form 3
// sections the lessThan, equalTo and greaterThan sections. You can
// then avoid infinite loops on partition failures by excluding the
// equalTo section from the recurse
// average case O(log(n) log(n) n)
// -- http://www.codeproject.com/Articles/26048/Fastest-In-Place-Stable-Sort
// worst case O(n^3) (i think)
// -- if the pivoit value sucks it will split all 2 one side and do N
// recursions, if that partition is N and a worst will repeatedly call
// rotate which is worst case N then we have N*N*N or N^3
// a list of less than 2 items is sorted by default
if ((end - begin) < 2) return;
// choose pivot.. mid point...
std::vector<int>::iterator p = begin + (end - begin)/2;
// record the pivot value..
int pivot = *(p);
// form the lessThan and greaterThanEqualTo sections
std::vector<int>::iterator greaterThanEqualToStart
= stablePartition(begin,
end,
[pivot](int v) { return v < pivot; });
// form the equalTo and greaterThan sections
std::vector<int>::iterator greaterThanStart
= stablePartition(greaterThanEqualToStart,
end,
[pivot](int v) { return not (pivot < v); });
// now recurse on the unhanded lessThan and greaterThan partitions *only*
stableQuickSort(begin, greaterThanEqualToStart);
stableQuickSort(greaterThanStart, end);
}
void mergeSort(std::vector<int>::iterator begin,
std::vector<int>::iterator end,
std::vector<int>& tmp)
{
// divide and conqure sort using *merging* of data
//
// stable
// -- because the left side merges in before right if the values are the same
// average case O(nlogn)
// -- we always divide in 2 and recurse for log(n) deep then do 2n in merge actions
// best case O(nlogn)
// -- same as average..
// worst case O(nlogn)
// -- same as best and average
// worst space complexity O(n)
// -- in theory u can grow tmp as u need to min the space..but practically
// this may result in issues because resizing a large vector temporally
// 2x the space as it has to copy the old buffer to the new one, you can
// avoid this by allocating new pages.. but most of time that not what i
// see in practice
if ((end - begin) < 2) return;
// split
std::vector<int>::iterator n = begin + (end - begin)/2;
// recurse
mergeSort(begin,n,tmp);
mergeSort(n,end,tmp);
//merge
std::vector<int>::iterator left = begin;
std::vector<int>::iterator right = n;
std::vector<int>::iterator wcursor = tmp.begin();
// stop the moment left runs out there is no need to copy all of right to tmp
while ( left < n )
{
if (right == end) *(wcursor++) = *(left++ );
else if (*right < *left) *(wcursor++) = *(right++);
else *(wcursor++) = *(left++ );
}
// copy the section we merged into tmp back out (note this may not be the full length)
std::vector<int>::iterator ccursor = tmp.begin();
while(ccursor != wcursor)
{
*(begin++) = *(ccursor++);
}
}
void inplaceMergeSort(std::vector<int>::iterator begin,
std::vector<int>::iterator end)
{
// divide and conquer sort using *inplace* merging of data
// the trick with inplace merge sort is that you rotate the sequences of
// data to instead of copy then into the new buffer
//
// inplace
// -- the use of rotates swaps data multiple times to keep the in the same locations
// stable
// average case O(log(n) log(n) n)
// -- http://www.codeproject.com/Articles/26048/Fastest-In-Place-Stable-Sort
if ((end - begin) < 2) return;
// split
std::vector<int>::iterator n = begin + (end - begin)/2;
// recurse
inplaceMergeSort(begin,n);
inplaceMergeSort(n,end);
//merge
std::vector<int>::iterator left = begin;
std::vector<int>::iterator right = n;
while ( left < right)
{
// find rotate start ..
while (not (*right < *left) and left < right) ++left;
if (not (left < right)) break;
// note rotate mid
std::vector<int>::iterator mid = right;
// find rotate end
while (*right < *left and right < end) ++right;
if (mid == right) break;
// swap the sections of the array
left = rotate(left,mid,right);
}
}
// ***********************************************
// ****************** HEAP SORT ******************
// ***********************************************
std::vector<int>::iterator parent(std::vector<int>::iterator begin,
std::vector<int>::iterator i)
{
return begin + (i - begin - 1)/2;
}
std::vector<int>::iterator child(std::vector<int>::iterator begin,
std::vector<int>::iterator i)
{
return begin + 2 * (i - begin) + 1;
}
void upShift(std::vector<int>::iterator begin,
std::vector<int>::iterator i)
{
while(begin != i)
{
std::vector<int>::iterator p = parent(begin, i);
if (*p > *i) return;
swap(p,i);
i = p;
}
}
void makeHeap(std::vector<int>::iterator begin,
std::vector<int>::iterator end)
{
// version 1: top down O(nlogn)..
if((end - begin) < 2) return;
for(std::vector<int>::iterator i = begin + 1;
i != end;
++i)
{
upShift(begin, i);
}
}
void reheap(std::vector<int>::iterator begin,
std::vector<int>::iterator end)
{
std::vector<int>::iterator i = begin;
while(1)
{
std::vector<int>::iterator kid1 = child(begin, i);
std::vector<int>::iterator kid2 = kid1 + 1;
if (not (kid1 < end)) return;
std::vector<int>::iterator kidmax
= ((kid2 < end) and (*kid1 < *kid2)) ? kid2 : kid1;
if (not (*kidmax > *i)) return;
swap(kidmax, i);
i = kidmax;
}
}
void heapSort(std::vector<int>::iterator begin,
std::vector<int>::iterator end)
{
// Heap sort, is the same as selection. Ie: Find the correct item for the
// current location but there is a critical difference, heap sort prepares
// the unsorted data as a tree that forms a priority index with the
// larger numbers the parents of children nodes
//
// inplace
// unstable
// -- the heap operators dont work to keep stable order
// best case: O(nlogn) ...
// -- building a heap can be done as O(n) but this implementation doesn't use it
if ((end - begin) < 2) return;
// make the heap
makeHeap(begin,end);
//then starting at the back
std::vector<int>::iterator i = end - 1;
while (i != begin) // dont bother with the last it has to be the largest
{
// move the largest to the end
swap(i, begin);
// and repair the heap
reheap(begin,i);
--i;
}
}
// ***********************************************
// ************** OVERLOAD WRAPPERS **************
// ***********************************************
// should be a real overload but std::functional cant seem to match the
// overload requested
void quickSortWrap(std::vector<int>& list)
{
quickSort(list.begin(), list.end());
}
void stableQuickSortWrap(std::vector<int>& list)
{
stableQuickSort(list.begin(), list.end());
}
void mergeSortWrap(std::vector<int>& list)
{
std::vector<int> tmp;
tmp.resize(list.size());
mergeSort(list.begin(), list.end(), tmp);
}
void inplaceMergeSortWrap(std::vector<int>& list)
{
inplaceMergeSort(list.begin(), list.end());
}
void heapSortWrap(std::vector<int>& list)
{
heapSort(list.begin(), list.end());
}
// ***********************************************
// ******************** TESTING ******************
// ***********************************************
bool check(std::vector<int>& list)
{
bool failed = false;
bool first = true;
int prev;
for ( int i : list)
{
if (first)
{
prev = i;
first = false;
}
else if (i < prev)
{
std::cout << "*";
failed = true;
}
prev = i;
std::cout << i << " ";
}
std::cout << " :" << (failed ? "FAILED" : "passed") << "\n";
}
int testHeap(std::vector<int> data)
{
// check indexing
if (data.size() > 1)
{
// some hard checks
// index 1 -> parent 0
if (data.size() > 1)
std::cout << " parent1: "
<< ((parent(data.begin(), data.begin()+1) == data.begin()) ?
"passed" : "FAILED")
<< "\n";
// index 2 -> parent 0
if (data.size() > 2)
std::cout << " parent2: "
<< ((parent(data.begin(), data.begin()+2) == data.begin()) ?
"passed" : "FAILED")
<< "\n";
// index 3 -> parent 1
if (data.size() > 3)
std::cout << " parent3: "
<< ((parent(data.begin(), data.begin()+3) == (data.begin()+1)) ?
"passed" : "FAILED")
<< "\n";
// index 4 -> parent 1
if (data.size() > 4)
std::cout << " parent4: "
<< ((parent(data.begin(), data.begin()+4) == (data.begin()+1)) ?
"passed" : "FAILED")
<< "\n";
// the soft general check
bool failed = false;
for (std::vector<int>::iterator i = data.begin() + 1;
i != data.end();
++i)
{
std::vector<int>::iterator p = parent(data.begin(),i);
std::vector<int>::iterator kid1 = child(data.begin(),p);
std::vector<int>::iterator kid2 = kid1 + 1;
bool bad = (kid1 != i) and (kid2 != i);
if (bad)
std::cout << " indexing bad at"
<< " i:" << (i-data.begin())
<< " p:" << (p-data.begin())
<< " k1:" << (kid1-data.begin())
<< " k2:" << (kid2-data.begin())
<< "\n";
failed |= bad;
}
std::cout << " general heap indexing:" << (failed ? "FAILED" : "passed") << "\n";
}
// check heap making
makeHeap(data.begin(),
data.end());
bool failed = false;
if (data.size() > 0)
std::cout << " # " << data[0];
for (int i = 1; i < data.size(); ++i)
{
int p = (i-1)/2;
bool bad = (data[i] > data[p]);
failed |= bad;
std::cout << (bad ? "|" : " ") << data[i];
}
std::cout <<" " << (failed ? "FAILED" : "passed")
<< "\n";
}
int testRotate()
{
std::vector<int> data({1,2,3,4,5,6,7,8});
std::vector<int> data1(data);
rotate(data1.begin(),
data1.begin()+2,
data1.end());
for (std::vector<int>::iterator a = data1.begin();
a != data1.end();
++a)
std::cout << " " << *a;
std::cout << "\n";
std::cout << (data1 == std::vector<int>({3,4,5,6,7,8,1,2}) ? "passed" : "FAILED")
<< "\n";
std::vector<int> data2(data);
rotate(data2.begin(),
data2.begin()+6,
data2.end());
for (std::vector<int>::iterator a = data2.begin();
a != data2.end();
++a)
std::cout << " " << *a;
std::cout << "\n";
std::cout << (data2 == std::vector<int>({7,8,1,2,3,4,5,6}) ? "passed" : "FAILED")
<< "\n";
}
int test_one(std::vector<int> data,
std::function<void (std::vector<int>&)> sort)
{
std::vector<int> list(data);
sort(list);
check(list);
}
void test(std::initializer_list<int> data)
{
std::cout << "testing heap parts\n";
testHeap(data);
std::cout << "testing sorts\n";
test_one(data,insertionSort);
test_one(data,selectionSort);
test_one(data,mergeSortWrap);
test_one(data,inplaceMergeSortWrap);
test_one(data,quickSortWrap);
test_one(data,stableQuickSortWrap);
test_one(data,heapSortWrap);
}
int main()
{
std::cout << "testing rotate\n";
testRotate();
std::cout << "test checker\n";
std::vector<int> list1({6,7,8,8,6,5});
check(list1);
std::cout << "testing sets\n";
test({});
test({1});
test({1,1}); // sort breaker
test({1,2});
test({2,1});
test({6,7,8,8,6,5});
test({2,2,2,2,2,2,2,2,2,2,2,2,2}); // sort breaker...
test({3,5,2,6,8,3,2,6,5,3,4,8,7});
}
</pre>Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-32662495506444577272016-07-02T22:26:00.001+09:002016-07-02T22:30:29.313+09:00google interview: uniform sampling from an infinite stream<pre class="brush:cpp">//http://www.impactinterview.com/2009/10/140-google-interview-questions/#software_engineer
// You have a stream of infinite queries (ie: real time Google search queries
// that people are entering). Describe how you would go about finding a good
// estimate of 1000 samples from this never ending set of data and then write
// code for it.
#include <iostream>
#include <iomanip>
#include <vector>
#include <ctime>
// Reservoir Sampling
// did it here http://code-slim-jim.blogspot.jp/2010/06/reservoir-sampling.html
// but wow 2010 is so long ago..
// ok so the explanation:
// you have stream and u need to sample from N "good" items it.. assumably
// "good" means uniform here so until you have the first N items u just take
// every thing
//
// now the tricky part when u get the N+1 item each item is therefore supposed
// to have a N/(N+1) probability of being in the output so the new item has a
// N/(N+1) chance of selection and all the existing items have a 1/N chance of
// begin rejected..
//
// then we can generalize to the (N+x)-th sample. when u get the N+x item each
// item is therefore supposed to have a N/(N+x) probability of being in the
// output so the new item has a N/(N+x) chance of selection and all the
// existing items have a 1/N chance of begin rejected
template <typename Type>
class ReservoirSample
{
public:
typedef std::vector<Type> Samples;
private:
std::size_t size_;
std::size_t count_;
std::vector<Type> samples_;
public:
ReservoirSample(unsigned int size) :
size_(size),
count_(0),
samples_()
{}
float rand()
{
return static_cast<float>(std::rand() % 1000) / 1000.0;
}
void sample(Type& item)
{
count_ += 1;
if (samples_.size() < size_)
{
samples_.push_back(item);
}
else
{
float chance = static_cast<float>(size_)/static_cast<float>(count_);
if (rand() < chance)
{
std::size_t idx = size_ * rand();
samples_[idx] = item;
}
}
}
std::vector<Type>& samples()
{
return samples_;
}
};
void test(int selection,
int streamSize)
{
// testing random stuff can be a pain.. you either de-random it
// or you repeat and confirm the expected distrubution (uniform...)
const int cycles = 100000;
std::vector<int> freq(streamSize,0);
for (int j = 0; j < cycles; ++j)
{
ReservoirSample<int> sampler(selection);
for (int i = 0; i < streamSize; ++i)
{
sampler.sample(i);
}
const std::vector<int>& samples = sampler.samples();
for ( std::vector<int>::const_iterator sit = samples.begin();
sit != samples.end();
++sit)
{
++(freq[*sit]);
}
}
// each number will come up once in a cycle and "selection" are choosen out of "streamSize"
float expected
= (static_cast<float>(cycles)
* static_cast<float>(selection))
/ static_cast<float>(streamSize);
// and lets give it +/- 5% .. (which will fail now and then..)
float expectMin = 0.95 * expected;
float expectMax = 1.05 * expected;
for (int i = 0; i < streamSize; ++i)
{
bool passed = freq[i] > expectMin and freq[i] < expectMax;
std::cout << std::setw(3) << i << ":" << freq[i]
<< " " << (passed ? "passed" : "FAILED")
<< "\n";
}
std::cout << "\n";
}
int main()
{
std::srand(std::time(0));
test(1,30);
test(3,30);
test(7,30);
test(15,30);
}
</pre><br />
<pre>0:3307 passed
1:3302 passed
2:3237 passed
3:3344 passed
4:3308 passed
5:3233 passed
6:3317 passed
7:3257 passed
8:3230 passed
9:3407 passed
10:3310 passed
11:3267 passed
12:3303 passed
13:3414 passed
14:3267 passed
15:3327 passed
16:3333 passed
17:3371 passed
18:3351 passed
19:3364 passed
20:3306 passed
21:3364 passed
22:3402 passed
23:3440 passed
24:3326 passed
25:3390 passed
26:3351 passed
27:3451 passed
28:3289 passed
29:3432 passed
0:10056 passed
1:9997 passed
2:10060 passed
3:10012 passed
4:9919 passed
5:10021 passed
6:9984 passed
7:9945 passed
8:9987 passed
9:10040 passed
10:9919 passed
11:9965 passed
12:9918 passed
13:9904 passed
14:9864 passed
15:9876 passed
16:10043 passed
17:9945 passed
18:10064 passed
19:10025 passed
20:9878 passed
21:10021 passed
22:10108 passed
23:9796 passed
24:10100 passed
25:10025 passed
26:10206 passed
27:10003 passed
28:10172 passed
29:10147 passed
0:23484 passed
1:23126 passed
2:23259 passed
3:23345 passed
4:23290 passed
5:23339 passed
6:23497 passed
7:23348 passed
8:23290 passed
9:23179 passed
10:23603 passed
11:23154 passed
12:23223 passed
13:23333 passed
14:23394 passed
15:23193 passed
16:23546 passed
17:23219 passed
18:23401 passed
19:23201 passed
20:23246 passed
21:23240 passed
22:23621 passed
23:23224 passed
24:23410 passed
25:23376 passed
26:23518 passed
27:23268 passed
28:23348 passed
29:23325 passed
0:49663 passed
1:49911 passed
2:50401 passed
3:49617 passed
4:49691 passed
5:50278 passed
6:49804 passed
7:49940 passed
8:50497 passed
9:49687 passed
10:49582 passed
11:50552 passed
12:49871 passed
13:49670 passed
14:50304 passed
15:50184 passed
16:49988 passed
17:49965 passed
18:50077 passed
19:49945 passed
20:50290 passed
21:50173 passed
22:49972 passed
23:50051 passed
24:49943 passed
25:50053 passed
26:50288 passed
27:49852 passed
28:50009 passed
29:49742 passed
</pre>Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-14756485919952675482016-07-02T10:54:00.002+09:002016-07-02T11:00:47.795+09:00Google interview question: write an execl col label to integer converter <pre class="brush:cpp">// http://www.impactinterview.com/2009/10/140-google-interview-questions/#software_engineer
// Write a function (with helper functions if needed) called to Excel that takes an excel column value
// (A,B,C,D…AA,AB,AC,… AAA..) and returns a corresponding integer value (A=1,B=2,… AA=26..).
#include <iostream>
#include <stdexcept>
// the function "toExecl" will convert *from* excel format to integers... ignoring that confusion...
int fromExcel(std::string val)
{
// there is an oddity here
// 0A -> A but A0 -> doesnt exist! note the +1 in the conversion line as a result
int out = 0;
for (std::string::iterator vit = val.begin();
vit != val.end();
++vit)
{
if (*vit < 'A' or *vit > 'Z')
throw std::runtime_error("doesnt look like an execl col");
out = (out*26) + (*vit - 'A' + 1);
}
return out;
}
int test(std::string in, int exp)
{
int out = fromExcel(in);
std::cout << (out == exp ? "passed" : "FAILED")
<< " in:" << in
<< " out:" << out
<< " exp:" << exp
<< "\n";
}
int main()
{
try
{
test("012", 0);
std::cout << "FAILED: did not detect bad inputs\n";
}
catch (std::exception& e)
{
std::cout << "pass: throw check worked\n";
}
test( "A", 1);
test( "Z", 26);
test( "AA", 1*26 + 1);
test( "AZ", 1*26 + 26);
test( "BA", 2*26 + 1);
test( "ZZ", 26*26 + 26);
test("AAA", 1*26*26 + 1*26 + 1);
test("AZZ", 1*26*26 + 26*26 + 26);
test("ZAA",26*26*26 + 1*26 + 1);
test("ZZZ",26*26*26 + 26*26 + 26);
}
</pre><br />
Output<br />
<br />
<pre>pass: throw check worked
passed in:A out:1 exp:1
passed in:Z out:26 exp:26
passed in:AA out:27 exp:27
passed in:AZ out:52 exp:52
passed in:BA out:53 exp:53
passed in:ZZ out:702 exp:702
passed in:AAA out:703 exp:703
passed in:AZZ out:1378 exp:1378
passed in:ZAA out:17603 exp:17603
passed in:ZZZ out:18278 exp:18278
</pre><br />
Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com2tag:blogger.com,1999:blog-612171808890707553.post-53320811591781845542016-07-02T10:25:00.002+09:002016-07-02T11:01:14.847+09:00Google interview questions: print matching chars in order of first string<br />
<pre class="brush:cpp">// http://www.impactinterview.com/2009/10/140-google-interview-questions/#software_engineer
// Write a function f(a, b) which takes two character string arguments and returns a string containing
// only the characters found in both strings in the order of a. Write a version which is order N-squared
// and one which is order N.
#include <iostream>
#include <string>
#include <functional>
#include <vector>
#include <cstring>
std::string match_the_stupid_way(std::string a, std::string b)
{
// the silly way is just for each char in string a O(n)
// look through string b O(n) double loop hence O(n^2)
std::string res;
for (std::string::iterator ait = a.begin();
ait != a.end();
++ait)
{
// ok match the first each char
std::string::iterator bit = b.begin();
while (bit != b.end() and
*bit != *ait)
{
++bit;
}
if (bit != b.end())
res += *ait;
}
return res;
}
std::string match_the_fast_way(std::string a, std::string b)
{
// O(N) .. this imples reading over a string (which takes O(N)) and
// digesting into an O(1) data struture and then check over the other...
// well an O(1) data structure is a hash or table..
std::vector<bool> found_char(256,false);
// the O(N) read is
for (std::string::iterator bit = b.begin();
bit != b.end();
++bit)
{
found_char[*bit] = true;
}
// and the second O(N) check is
std::string res;
for (std::string::iterator ait = a.begin();
ait != a.end();
++ait)
{
if (found_char[*ait])
res += *ait;
}
return res;
}
void test(std::function<std::string (std::string,std::string)> func,
std::string a,
std::string b,
std::string exp)
{
std::string out = func(a,b);
std::cout << (out == exp ? "passed" : "FAILED")
<< " a:\"" << a
<< "\" b:\"" << b
<< "\" out:\"" << out
<< "\" exp:\"" << exp
<< "\"\n";
}
void test_set(std::function<std::string (std::string,std::string)> func)
{
test(func, "a", "cba", "a");
test(func, "aaa", "cba", "aaa"); // hmm question isnt clear on repeats.. assume it does repeats
test(func, "abcdefg", "gfedcba", "abcdefg");
test(func, "the quick brown fox", "peter piper picked a pepper", "te ick r ");
std::cout << "*** SET DONE ***\n";
}
int main()
{
test_set(match_the_stupid_way);
test_set(match_the_fast_way);
}
</pre><br />
Output is <br />
<br />
<pre>passed a:"a" b:"cba" out:"a" exp:"a"
passed a:"aaa" b:"cba" out:"aaa" exp:"aaa"
passed a:"abcdefg" b:"gfedcba" out:"abcdefg" exp:"abcdefg"
passed a:"the quick brown fox" b:"peter piper picked a pepper" out:"te ick r " exp:"te ick r "
*** SET DONE ***
passed a:"a" b:"cba" out:"a" exp:"a"
passed a:"aaa" b:"cba" out:"aaa" exp:"aaa"
passed a:"abcdefg" b:"gfedcba" out:"abcdefg" exp:"abcdefg"
passed a:"the quick brown fox" b:"peter piper picked a pepper" out:"te ick r " exp:"te ick r "
*** SET DONE ***
</pre>Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-51467252470832364952016-07-02T00:58:00.004+09:002016-07-02T09:29:45.235+09:00google interview question: build a random generatorGoogle interview questions <br />
<br />
<pre class="brush:cpp">// http://www.impactinterview.com/2009/10/140-google-interview-questions/
// Given a function which produces a random integer in the
// range 1 to 5, write a function which produces a random
// integer in the range 1 to 7.
#include <cstring>
#include <cstdlib>
#include <iostream>
#include <functional>
#include <chrono>
// Note stuff the [1,5] thats just +1. -1 everywhere and a waste of time lets go [0,4]
int rand5()
{
return std::rand() % 5;
}
// ok so the problem is that you need to combine the rand5 to make rand7
// the question doesnt say anything about *uniform* distribution so in theroy
// u just call rand5 2 times and mod 7.. lets call this the weasel solution
// cause the question was badly written
int rand7_weasel()
{
return (rand5() + rand5()) % 7;
}
// ok so now the questioner has realized his mistake and corrected.
// a "uniform" rand7() generator
//
// The above is not uniform because if use two 6 sided dice and added the rolls
// the most common number is 7. Ie 7 has the most number of combos that add to it.
//
// So lets simplify and assume for the moment we have a more natural problem
// we can generate digits [0,9] uniformly and need to create a uniform number
// from [0,999].. well the answer is much more clear now you generate 3 numbers
// and use them as the digits of the final number
//
// The same idea works for the [0,4], we have 5 numbers so we simply use base 5
// instead of base 10. This way we can make a very large uniform distributed number
//
// once we have a very large range of uniformly distributed numbers we can module
// it by a much smaller number the binning error will be negligible
// lets minimize the binning error by making the max number possible (overflow it once)
//
// 2^32 = 5^N
// 32 log2 = N log5
// N = 13.78
int rand7_overkill()
{
unsigned int sum = 0;
for (int i = 0; i < 14; i++)
sum = (sum*5) + rand5();
return sum % 7;
}
// of course if you know your maths the central limit theorem says that for most distributions
// when you take many independent samples and summate them the resulting distribution will
// approach a another stable one.. for uniform distributions thats the normal distribution.
// refer to: http://demonstrations.wolfram.com/CentralLimitTheoremForTheContinuousUniformDistribution/
//
// Then add to this the fact that a small enough modulus on a normal distribution (much
// much less that its variance) will make the resulting distribution look somewhat uniform
// ie the wrapped normal distribution where sigma^2 approaches infinite
// refer to: https://en.wikipedia.org/wiki/Wrapped_normal_distribution
//
// So technically this works.. but a smart interviewer is going to ask you why it works.. and
// your likely to choke on it.. (i know the maths would kill me during an interview)
int rand7_central_limit_weasel()
{
unsigned int sum = 0;
for (int i = 0; i < 14; i++)
sum += rand5();
return sum % 7;
}
// ok so now we have a working solution.. but guess what your interviewing for google..
// and they want to curve ball it.. so they ask the following.. can you improve the
// performance?.. looping 14 times is too much.. a very "innocent" question
//
// now you might think ok well just reduce the number of iterations it that does it..
// but that is not really going to work well
//
// So here is the trick.. the real question is about testing your knowledge of pseudo
// random generators.. so just build one.. now a pseudo random generator isn't anything
// special but you have to hack one out in an interview and i don't have primes. pi and
// what not memorized by heart...
//
// What i do know is how to generate a uniform distribution that ranges from [0,infinite]..
// so all u have to do is make one big continuous rolling uniform number from the above code
// and then sample from it as needed..
//
// now this does raise some questions about the independence of the samples... but its faster...
int rand7_pseudo()
{
// inject a the random sample to seeded the hash
static unsigned int sum = rand5();
// the uniform sum on with the prior...
sum = (sum*5) + rand5();
return sum % 7;
}
void test(std::function<int()> randy)
{
int freq[7] = {0};
std::memset(freq, 0, sizeof(freq));
auto start = std::chrono::steady_clock::now();
for (int i = 0; i < 100000; ++i)
{
++freq[randy()];
}
auto finish = std::chrono::steady_clock::now();
double elapsed_seconds = std::chrono::duration_cast<
std::chrono::duration<double> >(finish - start).count();
for (int i=0;i<7;++i)
{
std::cout << "\n " << i << ":" << freq[i];
}
std::cout << "\n speed:" << elapsed_seconds
<< "\n";
}
int main()
{
test(rand7_weasel);
test(rand7_overkill);
test(rand7_central_limit_weasel);
test(rand7_pseudo);
}
</pre><br />
and output looks like this:<br />
<br />
<pre> 0:11949
1:12073
2:11885
3:16060
4:19993
5:16024
6:12016
speed:0.0190048
0:14130
1:14541
2:14177
3:14296
4:14157
5:14400
6:14299
speed:0.056561
0:14264
1:14538
2:14425
3:14064
4:14476
5:14076
6:14157
speed:0.0409873
0:14272
1:14230
2:14308
3:14110
4:14214
5:14422
6:14444
speed:0.0131068
</pre>Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-36546364497737555132016-06-07T21:16:00.000+09:002016-06-07T21:30:23.627+09:00c++ like template AutoGrad - How to compute the Derivative Of a function using templatesYou may have heard of a system called AutoGrad. It is a system that when given an equation computes the derivate of it automatically. Here is an example of how to build the same system using c++ templates.<br />
<br />
I was inspired to write this after i realized i messed up the differentiation of the for the neural networks back prop path in my last post. <br />
<br />
Clearly you see where im headed with this.. a few more mods to make it work with tensors and ill have full autograd system for c++ that can automatically compute the backdrop of a neural network.. and will also compile in c4driod.. and then toss in some thrust code and it will be CUDA capable able to use PC with GPUs<br />
<br />
from smartphone to GPU in a few short steps...<br />
<br />
Anyway here is the proof of concept code:<br />
<br />
output looks like<br />
<pre>x:2 m:3 c:4 p:6 y:10 z:16 q:100
dm/dx:0 dx/dx:1 dp/dx:1 dy/dx:3 dz/dx:12 dq/dx:60
</pre><br />
<pre class="brush:cpp">// compile with "g++ std=c++11 ..."
// or in c4driod on an andriod phone..
#include <tuple>
#include <iostream>
#include <cmath>
template <int VALUE>
struct Const
{
template< typename... Types >
static double op(std::tuple<Types...>& params )
{
return VALUE;
}
};
template <int ID>
struct Var
{
template< typename... Types >
static double op(std::tuple<Types...>& params )
{
return std::get<ID>(params);
}
};
template <typename L, typename R>
struct OpAdd
{
template< typename... Types >
static double op(std::tuple<Types...>& params)
{
return L::op(params) + R::op(params);
}
};
template <typename L, typename R>
struct OpMult
{
template< typename... Types >
static double op(std::tuple<Types...>& params)
{
return L::op(params) * R::op(params);
}
};
template <typename L, int POW>
struct OpPower
{
template< typename... Types >
static double op(std::tuple<Types...>& params)
{
return std::pow(L::op(params), POW);
}
};
template <typename Num, typename Denum>
struct DerivativeOf {};
template <int V, int ID_D>
struct DerivativeOf<Const<V>, Var<ID_D> >
{
typedef Const<0> Type;
};
template <int ID_N, int ID_D>
struct DerivativeOf<Var<ID_N>, Var<ID_D> >
{
typedef Const<0> Type;
};
template <int ID_N>
struct DerivativeOf<Var<ID_N>, Var<ID_N> >
{
typedef Const<1> Type;
};
template <typename L, typename R, typename D>
struct DerivativeOf<OpAdd<L,R>, D >
{
typedef OpAdd<typename DerivativeOf<L,D>::Type,
typename DerivativeOf<R,D>::Type> Type;
};
template <typename L, typename R, typename D>
struct DerivativeOf<OpMult<L,R>, D >
{
typedef OpAdd<OpMult<typename DerivativeOf<L, D>::Type, R>,
OpMult<L, typename DerivativeOf<R, D>::Type> > Type;
};
template <typename L, int POW>
struct DerivativeOf<OpPower<L,POW>, L >
{
typedef OpMult<Const<POW>, OpPower<L, POW-1> > Type;
};
template <typename L, int POW, typename D>
struct DerivativeOf<OpPower<L,POW>, D >
{
typedef OpMult<OpMult<Const<POW>, OpPower<L, POW-1> >,
typename DerivativeOf<L, D>::Type> Type;
};
int main()
{
typedef Var<0> X;
typedef Var<1> M;
typedef Var<2> C;
typedef OpAdd<X,C> P; // p = x + c
typedef OpAdd<OpMult<M,X>,C> Y; // y = m*x + c
typedef OpAdd<OpMult<M,OpPower<X,2>>,C> Z; // z = m*x^2 + c
typedef OpPower<OpAdd<OpMult<M,X>,C>,2> Q; // q = = y^2 = (m*x + c)^2
typedef DerivativeOf<M, X>::Type dM_dX; // dm/dx = 0
typedef DerivativeOf<X, X>::Type dX_dX; // dx/dx = 1
typedef DerivativeOf<P, X>::Type dP_dX; // dP/dx = 1
typedef DerivativeOf<Y, X>::Type dY_dX; // dY/Dx = m
typedef DerivativeOf<Z, X>::Type dZ_dX; // dZ/dx = 2x
typedef DerivativeOf<Q, X>::Type dQ_dX; // dQ/dx = dq/dy + dy/dx = 2*(m*x + c)*m
X x;
M m;
C c;
P p;
Y y;
Z z;
Q q;
dX_dX dx_dx;
dM_dX dm_dx;
dP_dX dp_dx;
dY_dX dy_dx;
dZ_dX dz_dx;
dQ_dX dq_dx;
std::tuple<double,double,double> params = std::make_tuple(2.0,3.0,4.0);
std::cout << " x:" << x.op(params)
<< " m:" << m.op(params)
<< " c:" << c.op(params)
<< " p:" << p.op(params)
<< " y:" << y.op(params)
<< " z:" << z.op(params)
<< " q:" << q.op(params)
<< "\n";
std::cout << " dm/dx:" << dm_dx.op(params)
<< " dx/dx:" << dx_dx.op(params)
<< " dp/dx:" << dp_dx.op(params)
<< " dy/dx:" << dy_dx.op(params)
<< " dz/dx:" << dz_dx.op(params)
<< " dq/dx:" << dq_dx.op(params)
<< "\n";
}
</pre>Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-62207207513129463532016-05-05T23:51:00.001+09:002016-06-07T21:28:12.150+09:00libraryless c++ neural netsUPDATE: DONT USE THIS<br />
<br />
I have noticed that i messed up the backprop.. seems Matrics are not my strong point and do/dw is a killer.. even the wiki page on it gives up https://en.wikipedia.org/wiki/Matrix_calculus#Layout_conventions ... check the "Result of differentiating various kinds of aggregates with other kinds of aggregates" table it has "??" in the vector y to matrix X column<br />
<br />
<hr/><br />
I like every other com-sci guy on the planet is studying Machine learning... But to do this tools are needed..<br />
<br />
Libraries, toolkits and frameworks with their sundry list of requirements have always annoyed the hell out of me. These things are never cost-less.. We constantly have burn time to setup a system that is compatible with the megabytes of dependencies that these things require to be installed, then deal with the portability issues that stop them from installing and then over the long term maintain and upgrade them and deal with package drift between the various components to keep it all running and then hope that the package maintainers dont quit updating the entire mess. <br />
<br />
Library free raw language implementation have some big plus going for them:<br />
* Firstly any of todays smartphones have enough processing power to do the job of training and working with neural networks.. but the tool chain makes it next to impossible actually do the job.. <br />
* Secondly companies are often paranoid about what you can install in workstations.<br />
<br />
So in short I have tossed out the ML tool chain and give a shoot at doing it in plain simple c++. <br />
<br />
ML at its core is just Matrix math so I have hacked together a Matrix class that can be compiled with c4driod, g++, and clang. Then using this Matrix class build a simple xor model and trained using hand worked out back prop. This is proof of concept that with less than 1000 lines of normal c++ code and a standard c++ compiler your good to go for ML projects.<br />
<br />
The whole mess is 728 lines of c++11 code (it needs a fair bit clean up but hey whatever)<br />
<br />
Heres the end result of the build and run on my smartphone (a nexus 5x using c4driod)<br />
<br />
<div class="separator" style="clear: both; text-align: center;"><a href="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiLu5VEo8EB8345vDuB990o-IRqp5-8nfwDHNgrvVaoyc3XKnlmTqkZKceJFyQGnZlRnoxM0uyGzAHmp6UFlxSlMLldfxGYRStAm1ZF_xX4d2ot5WBUS3wa9Q3pPEVWWZzVvYo4xdwByoY/s1600/Picture.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" src="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiLu5VEo8EB8345vDuB990o-IRqp5-8nfwDHNgrvVaoyc3XKnlmTqkZKceJFyQGnZlRnoxM0uyGzAHmp6UFlxSlMLldfxGYRStAm1ZF_xX4d2ot5WBUS3wa9Q3pPEVWWZzVvYo4xdwByoY/s400/Picture.png" /></a></div><br />
<br />
And the code:<br />
<pre class="brush:cpp">#include <iostream>
#include <vector>
#include <functional>
#include <memory>
#include <sstream>
#include <stdexcept>
#include <cmath>
#include <cstdlib>
// ****************************************************************
// *************************** MATRIX *****************************
// ****************************************************************
template <typename Type>
struct Matrix
{
//private:
typedef std::vector<Type> Data;
std::shared_ptr<Data> data_;
std::vector<std::size_t> shape_;
// friend class MatrixUtils<Type>;
//public:
Matrix()
{}
Matrix(std::size_t cols,
std::size_t rows)
{
data_.reset(new Data);
data_->resize(cols*rows);
shape_.push_back(cols);
shape_.push_back(rows);
}
template <std::size_t N>
Matrix(Type (&raw)[N],
std::size_t cols,
std::size_t rows)
{
data_.reset(new Data);
data_->resize(N);
data_->assign(raw,raw+N);
shape_.push_back(cols);
shape_.push_back(rows);
}
Matrix(Type* begin, Type* end,
std::size_t cols,
std::size_t rows)
{
data_.reset(new Data);
data_->resize(cols*rows);
data_->assign(begin,end);
shape_.push_back(cols);
shape_.push_back(rows);
}
Matrix(std::shared_ptr<Data> data,
std::size_t cols,
std::size_t rows) :
data_(data)
{
shape_.push_back(cols);
shape_.push_back(rows);
}
Type& at(std::size_t x, std::size_t y)
{
return (*data_)[y*shape_[0] + x];
}
Type at(std::size_t x, std::size_t y) const
{
return (*data_)[y*shape_[0] + x];
}
};
// ****************************************************************
// ************************ MATRIX UTILS **************************
// ****************************************************************
template <typename Type>
struct MatrixUtils
{
static void print(std::ostream& os, const Matrix<Type>& a)
{
for (std::size_t y = 0; y < a.shape_[1]; ++y)
{
for (std::size_t x = 0; x < a.shape_[0]; ++x)
{
os << a.at(x,y) << " ";
}
os << "\n";
}
}
static Matrix<Type> matmul(const Matrix<Type>& a,
const Matrix<Type>& b)
{
if (a.shape_[0] != b.shape_[1])
{
std::stringstream ss;
ss << "Matrix shapes wrong for matmul"
<< " a: " << a.shape_[0] << "x" << a.shape_[1]
<< " b: " << b.shape_[0] << "x" << b.shape_[1];
throw std::runtime_error(ss.str());
}
Matrix<Type> res(b.shape_[0], a.shape_[1]);
for (std::size_t y = 0; y < res.shape_[1]; ++y)
{
for (std::size_t x = 0; x < res.shape_[0]; ++x)
{
Type sum = 0;
for (std::size_t i = 0; i < a.shape_[0]; ++i)
{
sum += a.at(i,y) * b.at(x,i);
}
res.at(x,y) = sum;
}
}
return res;
}
static Matrix<Type> selrow(std::size_t row,
const Matrix<Type>& a)
{
// TODO reimpl as an iterator!
Matrix<Type> r(a.shape_[0],1);
for (std::size_t x = 0; x < a.shape_[0]; ++x)
{
r.at(x,0) = a.at(x,row);
}
return r;
}
static Matrix<Type> selcol(std::size_t col,
const Matrix<Type>& a)
{
Matrix<Type> r(1,a.shape_[1]);
for (std::size_t y = 0; y < a.shape_[1]; ++y)
{
r.at(0,y) = a.at(col,y);
}
return r;
}
static Matrix<Type> transpose(const Matrix<Type>& a)
{
Matrix<Type> r(a.shape_[1],a.shape_[0]);
for (std::size_t y = 0; y < a.shape_[1]; ++y)
{
for (std::size_t x = 0; x < a.shape_[0]; ++x)
{
r.at(y,x) = a.at(x,y);
}
}
return r;
}
static void unifunctor_inplace(std::function<Type (Type)> func,
Matrix<Type>& a)
{
typedef typename Matrix<Type>::Data::iterator iterator;
// TODO this feels llike there should be an std:algo for it
// maybe generate ??
for(iterator it = a.data_->begin();
it != a.data_->end();
++it)
{
*it = func(*it);
}
}
static Matrix<Type> unifunctor(std::function<Type (Type)> func,
const Matrix<Type>& a)
{
typedef typename Matrix<Type>::Data::iterator iterator;
// TODO this feels llike there should be an std:algo for it
// maybe generate ??
Matrix<Type> r(a.shape_[0],a.shape_[1]);
iterator ait = a.data_->begin();
for(iterator rit = r.data_->begin();
rit != r.data_->end();
++rit)
{
*rit = func(*ait);
++ait;
}
return r;
}
static Matrix<Type> bifunctor(std::function<Type (Type,Type)> func,
const Matrix<Type>& a,
const Matrix<Type>& b)
{
typedef typename Matrix<Type>::Data::iterator iterator;
if (a.shape_[0] != b.shape_[0] or
a.shape_[1] != b.shape_[1])
{
std::stringstream ss;
ss << "Matrix shapes mismatch for bifunctor"
<< " a: " << a.shape_[0] << "x" << a.shape_[1]
<< " b: " << b.shape_[0] << "x" << b.shape_[1];
throw std::runtime_error(ss.str());
}
// TODO this feels llike there should be an std:algo for it
// maybe generate ??
Matrix<Type> r(a.shape_[0], a.shape_[1]);
iterator ait = a.data_->begin();
iterator bit = b.data_->begin();
iterator rit = r.data_->begin();
while (rit != r.data_->end())
{
*rit = func(*ait, *bit);
++ait;
++bit;
++rit;
}
return r;
}
static void bifunctor_inplace(std::function<Type (Type,Type)> func,
Matrix<Type>& a,
const Matrix<Type>& b)
{
typedef typename Matrix<Type>::Data::iterator iterator;
if (a.shape_[0] != b.shape_[0] or
a.shape_[1] != b.shape_[1])
{
std::stringstream ss;
ss << "Matrix shapes mismatch for bifunctor"
<< " a: " << a.shape_[0] << "x" << a.shape_[1]
<< " b: " << b.shape_[0] << "x" << b.shape_[1];
throw std::runtime_error(ss.str());
}
// TODO this feels llike there should be an std:algo for it
// maybe generate ??
Matrix<Type> r(a.shape_[0], a.shape_[1]);
iterator bit = b.data_->begin();
for(iterator ait = a.data_->begin();
ait != a.data_->end();
++ait)
{
*ait = func(*ait, *bit);
++bit;
}
}
static Matrix<Type> bifunctor_row(std::function<Type (Type,Type)> func,
const Matrix<Type>& a,
const Matrix<Type>& b)
{
typedef typename Matrix<Type>::Data::iterator iterator;
if (a.shape_[0] != b.shape_[0] or
b.shape_[1] != 1)
{
std::stringstream ss;
ss << "Matrix shapes mismatch for bifunctor_row"
<< " a: " << a.shape_[0] << "x" << a.shape_[1]
<< " b: " << b.shape_[0] << "x" << b.shape_[1];
throw std::runtime_error(ss.str());
}
// TODO this feels llike there should be an std:algo for it
// maybe generate ??
Matrix<Type> r(a.shape_[0], a.shape_[1]);
iterator ait = a.data_->begin();
iterator bit = b.data_->begin();
iterator rit = r.data_->begin();
while (rit != r.data_->end())
{
if (bit == b.data_->end()) bit = b.data_->begin();
*rit = func(*ait, *bit);
++ait;
++bit;
++rit;
}
return r;
}
static Matrix<Type> bifunctor_scaler(std::function<Type (Type,Type)> func,
const Type a,
const Matrix<Type>& b)
{
typedef typename Matrix<Type>::Data::iterator iterator;
// TODO this feels llike there should be an std:algo for it
// maybe generate ??
Matrix<Type> r(b.shape_[0], b.shape_[1]);
iterator bit = b.data_->begin();
iterator rit = r.data_->begin();
while (rit != r.data_->end())
{
*rit = func(a,*bit);
++bit;
++rit;
}
return r;
}
static Matrix<Type> bifunctor_scaler(std::function<Type (Type,Type)> func,
const Matrix<Type>& a,
const Type b)
{
typedef typename Matrix<Type>::Data::iterator iterator;
// TODO this feels llike there should be an std:algo for it
// maybe generate ??
Matrix<Type> r(a.shape_[0], a.shape_[1]);
iterator ait = a.data_->begin();
iterator rit = r.data_->begin();
while (rit != r.data_->end())
{
*rit = func(*ait,b);
++ait;
++rit;
}
return r;
}
struct Helpers
{
static Type add(Type a, Type b) { return a+b; }
static Type sub(Type a, Type b) { return a-b; }
static Type mul(Type a, Type b) { return a*b; }
static Type div(Type a, Type b) { return a/b; }
static Type relu(Type a) { return (a < 0) ? 0 : a; }
static Type tanh(Type a) { return std::tanh(a); }
static Type rand(Type a) { return static_cast<Type>(std::rand() % 100)/100.0 - 0.5; }
static Type ones(Type a) { return 1.0; }
static Type xor_f(Type a, Type b) { return (a*b < 0) ? -1.0 : 1.0; }
};
};
// ****************************************************************
// ************************ MATRIX OPERATORS **********************
// ****************************************************************
template <typename Type>
std::ostream& operator<<(std::ostream& os, const Matrix<Type>& a)
{
MatrixUtils<Type>::print(os, a);
return os;
}
template <typename Type>
Matrix<Type> operator+(const Matrix<Type>& a,
const Matrix<Type>& b)
{
return MatrixUtils<Type>::bifunctor(&MatrixUtils<Type>::Helpers::add,a,b);
}
template <typename Type>
Matrix<Type> operator-(const Matrix<Type>& a,
const Matrix<Type>& b)
{
return MatrixUtils<Type>::bifunctor(&MatrixUtils<Type>::Helpers::sub,a,b);
}
template <typename Type>
Matrix<Type> operator*(const Matrix<Type>& a,
const Matrix<Type>& b)
{
return MatrixUtils<Type>::matmul(a,b);
}
template <typename Type>
Matrix<Type> operator*(Type a,
const Matrix<Type>& b)
{
return MatrixUtils<Type>::bifunctor_scaler(&MatrixUtils<Type>::Helpers::mul,a,b);
}
template <typename Type>
Matrix<Type> operator*(const Matrix<Type>& a,
Type b)
{
return MatrixUtils<Type>::bifunctor_scaler(&MatrixUtils<Type>::Helpers::mul,a,b);
}
template <typename Type>
Matrix<Type> operator/(const Matrix<Type>& a,
const Matrix<Type>& b)
{
return MatrixUtils<Type>::bifunctor(&MatrixUtils<Type>::Helpers::div,a,b);
}
template <typename Type>
Matrix<Type> operator+=(Matrix<Type>& a,
const Matrix<Type>& b)
{
MatrixUtils<Type>::bifunctor_inplace(&MatrixUtils<Type>::Helpers::add,a,b);
return a;
}
template <typename Type>
Matrix<Type> product(const Matrix<Type>& a,
const Matrix<Type>& b)
{
return MatrixUtils<Type>::bifunctor(&MatrixUtils<Type>::Helpers::mul,a,b);
}
template <typename Type>
Matrix<Type> rowadd(const Matrix<Type>& a,
const Matrix<Type>& b)
{
return MatrixUtils<Type>::bifunctor_row(&MatrixUtils<Type>::Helpers::add,a,b);
}
template <typename Type>
Matrix<Type> transpose(const Matrix<Type>& a)
{
return MatrixUtils<Type>::transpose(a);
}
template <typename Type>
Matrix<Type> tanh(const Matrix<Type>& a)
{
return MatrixUtils<Type>::unifunctor(&MatrixUtils<Type>::Helpers::tanh,a);
}
template <typename Type>
Matrix<Type> tanh_derivate(const Matrix<Type>& a)
{
// dtanh/dx = 1 - (tanh(x)) ^ 2
Matrix<Type> th = tanh(a);
return MatrixUtils<Type>::bifunctor_scaler(&MatrixUtils<Type>::Helpers::sub, 1, product(th,th));
}
template <typename Type>
void rand(Matrix<Type>& a)
{
MatrixUtils<Type>::unifunctor_inplace(&MatrixUtils<Type>::Helpers::rand,a);
}
template <typename Type>
void ones(Matrix<Type>& a)
{
MatrixUtils<Type>::unifunctor_inplace(&MatrixUtils<Type>::Helpers::ones,a);
}
// ****************************************************************
// ****************************************************************
// ****************************************************************
struct Model
{
// input 2
// hidden1 4
// hidden2 2
// output 1
// function tanh
// learn rate: 0.03
// batch size 10
// xor data [-1, 1]
// result in less than 140 iterators
typedef float Type;
struct Network
{
Matrix<Type> x_;
Matrix<Type> o1_;
Matrix<Type> h1_;
Matrix<Type> o2_;
Matrix<Type> h2_;
Matrix<Type> o3_;
Matrix<Type> y_ ;
};
struct Params
{
Matrix<float> Wh1_;
Matrix<float> Bh1_;
Matrix<float> Wh2_;
Matrix<float> Bh2_;
Matrix<float> Wo_;
Matrix<float> Bo_;
Params() :
Wh1_(4,2),
Bh1_(4,1),
Wh2_(2,4),
Bh2_(2,1),
Wo_ (1,2),
Bo_ (1,1)
{
// init wieghts
rand(Wh1_);
rand(Bh1_);
rand(Wh2_);
rand(Bh2_);
rand(Wo_ );
rand(Bo_ );
}
};
Params p_;
// init weights
Model() :
p_()
{}
void forward(Network& net, const Matrix<Type>& inputs)
{
// forward network params
// h1 = tanh(Wh1*x + Bh1)
// h2 = tanh(Wh2*h1 + Bh2)
// y = tanh(Wo *h2 + Bo )
net.x_ = inputs;
net.o1_ = rowadd(net.x_ * p_.Wh1_, p_.Bh1_); net.h1_ = tanh(net.o1_);
net.o2_ = rowadd(net.h1_ * p_.Wh2_, p_.Bh2_); net.h2_ = tanh(net.o2_);
net.o3_ = rowadd(net.h2_ * p_.Wo_ , p_.Bo_ ); net.y_ = tanh(net.o3_);
}
Matrix<Type> loss(const Matrix<Type>& expected,
const Network& net)
{
// you dont have to run this unless your printing stuff.. the back prop needs the "dE/dy" not the "E"
Matrix<Type> delta = net.y_ - expected;
return product(delta, delta);
}
void backward(const Matrix<Type>& expected,
const Network& net,
const Type alpha)
{
// note tanh(x) = (exp(?x)-?exp(?-x))/?(exp(?x)+?exp(?-x))
// dtanh/dx = 1 - (tanh(x)) ^ 2
// forward network params
// o1 = Wh1*x + Bh1 ; h1 = tanh(o1)
// o2 = Wh2*h1 + Bh2 ; h2 = tanh(o2)
// o3 = Wo *h2 + Bo ; y = tanh(o3)
// E = (e - y)^2
// dE/dy = 2 * (y - e)
// =~ (y - e) // toss the 2 out as alpha will take it
// sigma3 = dE/do3
// = dE/dy * dy/do3
// = (y - e) * (1 - (tanh(o3))^2)
// sigma2 = dE/do2
// = dE/dy * dy/do3 * do3/dh2 * dh2/do2
// = sigma3 * do3/dh2 * dh2/do2
// = sigma3 * Wo * (1 - (tanh(o2))^2)
// sigma1 = dE/do1
// = dE/dy * dy/do3 * do3/dh2 * dh2/do2 * do2/dh1 * dh1/do1
// = sigma2 * do2/dh1 * dh1/do1
// = sigma2 * Wh2 * (1 - (tanh(o1))^2)
// dWo = -alpha * dE/dWo
// = -alpha * dE/dy * dy/do3 * do3/dWo
// = -alpha * sigma3 * do3/dWo
// = -alpha * sigma3 * h2
// dWh2 = -alpha * dE/dWh2
// = -alpha * dE/dy * dy/do3 * do3/dh2 * dh2/do2 * do2/dWh2
// = -alpha * sigma2 * do2/dWh2
// = -alpha * sigma2 * h1
// dWh1 = -alpha * dE/dWh1
// = -alpha * dE/dy * dy/do3 * do3/dh2 * dh2/do2 * do2/dh1 * dh1/do1 * do1/dWh1
// = -alpha * sigma1 * do1/dWh1
// = -alpha * sigma1 * x
// dBo = -alpha * dE/dBo
// = -alpha * dE/dy * dy/do3 * do3/dBo
// = -alpha * sigma3 * do3/dBo
// = -alpha * sigma3 * 1
// dBh2 = -alpha * dE/dBh2
// = -alpha * dE/dy * dy/do3 * do3/dh2 * dh2/do2 * do2/dBh2
// = -alpha * sigma2 * do2/dBh2
// = -alpha * sigma2 * 1
// dBh1 = -alpha * dE/dBh1
// = -alpha * dE/dy * dy/do3 * do3/dh2 * dh2/do2 * do2/dh1 * dh1/do1 * do1/dBh1
// = -alpha * sigma1 * do1/dBh1
// = -alpha * sigma1 * 1
Matrix<Type> sigma3 = product((net.y_ - expected) , tanh_derivate(net.o3_));
Matrix<Type> sigma2 = product(sigma3 * transpose(p_.Wo_ ), tanh_derivate(net.o2_));
Matrix<Type> sigma1 = product(sigma2 * transpose(p_.Wh2_), tanh_derivate(net.o1_));
Matrix<Type> dWo = -alpha * (transpose(net.h2_) * sigma3);
Matrix<Type> dWh2 = -alpha * (transpose(net.h1_) * sigma2);
Matrix<Type> dWh1 = -alpha * (transpose(net.x_ ) * sigma1);
Matrix<Type> transposed_unit(sigma3.shape_[1], 1);
ones(transposed_unit);
Matrix<Type> dBo = -alpha * (transposed_unit * sigma3);
Matrix<Type> dBh2 = -alpha * (transposed_unit * sigma2);
Matrix<Type> dBh1 = -alpha * (transposed_unit * sigma1);
p_.Wo_ += dWo ;
p_.Wh2_ += dWh2;
p_.Wh1_ += dWh1;
p_.Bo_ += dBo ;
p_.Bh2_ += dBh2;
p_.Bh1_ += dBh1;
}
void train(const Matrix<Type>& inputs,
const Matrix<Type>& expected,
const Type alpha,
const int cycles)
{
//gradient descent .. the hard way?
for (int i = 0; i < cycles; ++i)
{
// batching ??
Network net;
forward(net, inputs);
backward(expected, net, alpha);
if ((i % 10) == 0)
{
Matrix<Type> err = loss(expected,net);
Matrix<Type> transposed_unit(err.shape_[1], 1);
ones(transposed_unit);
Matrix<Type> netErr = transposed_unit * err;
std::cout << "**********************************************\n";
std::cout << "step: " << i << " err:" << netErr;
std::cout << " Wo_ :\n" << p_.Wo_ ;
std::cout << " Wh2_:\n" << p_.Wh2_;
std::cout << " Wh1_:\n" << p_.Wh1_;
std::cout << " Bo_ :\n" << p_.Bo_ ;
std::cout << " Bh2_:\n" << p_.Bh2_;
std::cout << " Bh1_:\n" << p_.Bh1_;
}
}
}
};
void basicTest()
{
int rawA[] = {1,2,3,4};
int rawB[] = {2,3,4,5};
int rawC[] = {2,3,4,5,6,7};
Matrix<int> a(rawA, 2, 2);
Matrix<int> b(rawB, 2, 2);
Matrix<int> c(rawC, 3, 2);
std::cout << a << "\n";
std::cout << b << "\n";
std::cout << c << "\n";
std::cout << (a + b) << "\n";
try { std::cout << (a + c) << "\n"; }
catch (std::exception& e) { std::cout << e.what() << "\n"; }
std::cout << MatrixUtils<int>::matmul(a,c) << "\n";
try { std::cout << MatrixUtils<int>::matmul(c,a) << "\n"; }
catch (std::exception& e) { std::cout << e.what() << "\n"; }
std::cout << "SUCCESS\n";
}
void xorNetTest()
{
// generate some train data
Matrix<float> inputs(2,100);
rand(inputs);
// compute xor for input data
Matrix<float> expected
= MatrixUtils<float>::bifunctor(&MatrixUtils<float>::Helpers::xor_f,
MatrixUtils<float>::selcol(0, inputs),
MatrixUtils<float>::selcol(1, inputs));
Model model;
model.train(inputs,expected,0.03,10001);
}
int main()
{
basicTest();
xorNetTest();
}
</pre>Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-89661951218100372292015-12-13T18:22:00.000+09:002015-12-13T18:22:11.373+09:00Varaidic templated template parameters for a loop unroller/code repeaterThis basically a demonstration how to use variadic templated template parameter in combination with a variadic template outer handler that locks the typing of the inner part into place with automatic type resolution. This application is basically demonstrating how to create a static loop unroller or code repeater... <br />
<br />
<pre class="brush:cpp">// compile: g++ -std=c++11 metafunctor.cpp
#include <iostream>
#include <map>
// **************************************************************
// **********************TEMPLATE FOR****************************
// **************************************************************
template <template<int> class Func, int... IDs> class ForFunctor;
template <template<int> class Func, int ID, int... IDs>
class ForFunctor<Func,ID,IDs...>
{
public:
template <typename... Params>
static void with(Params& ...params)
{
Func<ID>::op(params...);
ForFunctor<Func,IDs...>::with(params...);
}
};
template <template<int> class Func>
class ForFunctor<Func>
{
public:
template <typename... Params>
static void with(Params& ...params) {}
};
// **************************************************************
// **********************SOME FUNCTORS****************************
// **************************************************************
template <int ID>
class Init
{
public:
template <typename OUT>
static void op(OUT& out)
{
out[ID] = ID;
}
};
template <int ID>
class CopyID
{
public:
template <typename OUT, typename IN>
static void op(OUT& out,IN& in)
{
out[ID] = in[ID];
}
};
// **************************************************************
// ***************************MAIN*******************************
// **************************************************************
int main()
{
typedef std::map<int,int> A;
A a;
A b;
ForFunctor<Init,1,2,6,3,10>::with(a);
ForFunctor<CopyID,1,2,6>::with(b,a);
std::cout << "a: ";
for (auto value: a) std::cout << value.second << " ";
std::cout << "\nb: ";
for (auto value: b) std::cout << value.second << " ";
std::cout << "\n";
}
</pre>Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-11590683583114475652015-04-15T22:21:00.000+09:002015-04-16T01:58:21.359+09:00Creating iomanip for a class - the easy wayNow creating custom io manipulator for your classes can be damn annoying.. The trick to building them is to install and manage a handler for the custom manipulator in the pword array of the ostream. I have hacked this little CustomManip class that allows you to quickly install a lambda function as the a stream manipulator to select from or parameterize the various print methods that you have in play for your class. The only requirement is that each class have at least a " void printDefault(std::ostream& os) const" method that the default print method.<br />
<br />
PS: Also there is one little bug with the code.. it does install the streamEvent responder several times.. but what a blog post without a bug or 2..<br />
<br />
Anyway heres the code:<br />
<pre class="brush:cpp">//g++ -g --std=c++11 custom_class_manip.cpp
#include <iostream>
#include <ios>
#include <sstream>
#include <functional>
template <typename TYPE>
class CustomManip
{
public:
typedef std::function<void(std::ostream&, const TYPE&)> ManipFunc;
// installation helper for a manipulator with params
class Installer
{
ManipFunc func_;
public:
Installer(ManipFunc func) :
func_(func)
{}
inline friend
std::ostream& operator<<(std::ostream& os,
const Installer& installer )
{
CustomManip<TYPE>::install(os,
installer.func_);
return os;
}
};
private:
struct CustomManipHandle
{
ManipFunc func_;
CustomManipHandle() { std::cout << "new handle at " << this << "\n"; }
~CustomManipHandle() { std::cout << "del handle at " << this << "\n"; }
};
static int handleIndex()
{
// the id for this Custommaniputors params
// in the os_base parameter maps
static int index = std::ios_base::xalloc();
return index;
}
public:
// for parameterized manipulator installations
static Installer make_installer(ManipFunc func)
{
return Installer(func);
}
// for unparameterized manipulator installations
static void install(std::ostream& os, ManipFunc func)
{
CustomManipHandle* handle =
static_cast<CustomManipHandle*>(os.pword(handleIndex()));
// check if its installed on this ostream
if (handle == NULL)
{
// install it
handle = new CustomManipHandle();
os.pword(handleIndex()) = handle;
// install the callback so we can destroy it
os.register_callback (CustomManip<TYPE>::streamEvent,0);
}
handle->func_ = func;
}
static void uninstall(std::ios_base& os)
{
CustomManipHandle* handle =
static_cast<CustomManipHandle*>(os.pword(handleIndex()));
//delete the installed Custommanipulator handle
if (handle != NULL)
{
os.pword(handleIndex()) = NULL;
delete handle;
}
}
static void streamEvent (std::ios::event ev,
std::ios_base& os,
int id)
{
switch (ev)
{
case os.copyfmt_event:
std::cout << "copyfmt_event\n"; break;
// does this mean i need to copy my manip as well??
case os.imbue_event:
std::cout << "imbue_event\n"; break;
case os.erase_event:
std::cout << "erase_event\n";
uninstall(os);
break;
}
}
static void print(std::ostream& os, const TYPE& data)
{
CustomManipHandle* handle
= static_cast<CustomManipHandle*>(os.pword(handleIndex()));
if (handle != NULL)
{
handle->func_(os, data);
return;
}
data.printDefault(os);
}
};
/**********************************
****************TEST**************
**********************************/
class A
{
int v_;
public:
A(int v) :
v_(v)
{}
void printDefault(std::ostream& os) const { os << " " << v_ << " default\n"; }
void printVer1 (std::ostream& os) const { os << " " << v_ << " abc\n"; }
void printVer2 (std::ostream& os) const { os << " " << v_ << " xyz\n"; }
void printParam (std::ostream& os,
const std::string paramA,
const int paramB) const
{
os << " " << v_ << " param: " << paramA << " " << paramB << "\n";
}
};
std::ostream& operator<<(std::ostream& os, const A& a)
{
CustomManip<A>::print(os, a);
return os;
}
class B
{
public:
void printDefault(std::ostream& os) const { os << " default\n"; }
void printVer3 (std::ostream& os) const { os << " mno\n"; }
};
std::ostream& operator<<(std::ostream& os, const B& b)
{
CustomManip<B>::print(os, b);
return os;
}
std::ostream& manip_default (std::ostream& os)
{
CustomManip<A>::uninstall(os);
CustomManip<B>::uninstall(os);
return os;
}
std::ostream& manip_ver1 (std::ostream& os)
{
CustomManip<A>::install(os,
[](std::ostream& oos, const A& a)
{
a.printVer1(oos);
});
return os;
}
std::ostream& manip_ver2(std::ostream& os)
{
CustomManip<A>::install(os,
[](std::ostream& oos, const A& a)
{
a.printVer2(oos);
});
return os;
}
std::ostream& manip_ver3(std::ostream& os)
{
CustomManip<B>::install(os,
[](std::ostream& oos, const B& b)
{
b.printVer3(oos);
});
return os;
}
CustomManip<A>::Installer manip_param(const std::string str,
const int other)
{
return CustomManip<A>::make_installer(
[str, other](std::ostream& oos, const A& a)
{
a.printParam(oos, str, other);
});
}
int main()
{
A a(1);
B b;
std::cout << a
<< b
<< "\n";
std::cout << manip_ver1 << a
<< manip_ver2 << a
<< manip_ver3 << b
<< manip_default << a
<< manip_ver1 << a
<< manip_param("testing", 4) << a
<< b
<< "\n";
std::stringstream oss;
oss << manip_ver1 << a
<< manip_ver2 << a
<< manip_ver3 << b
<< manip_default << a
<< manip_ver1 << a
<< manip_param("testing", 4) << a
<< b
<< "\n";
std::cout << oss.str();
}
</pre><br />
The resulting output looks like<br />
<pre> 1 default
default
new handle at 0x307de0
1 abc
1 xyz
new handle at 0x307e50
mno
del handle at 0x307de0
del handle at 0x307e50
1 default
new handle at 0x307de0
1 abc
1 param: testing 4
default
new handle at 0x307ef0
new handle at 0x307e90
del handle at 0x307ef0
del handle at 0x307e90
new handle at 0x307e90
1 abc
1 xyz
mno
1 default
1 abc
1 param: testing 4
default
erase_event
del handle at 0x307e90
erase_event
erase_event
</pre><br />
Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0tag:blogger.com,1999:blog-612171808890707553.post-2333462793572518752014-11-18T23:06:00.001+09:002014-11-18T23:14:17.049+09:00How to create a parsing/printing system with 1 line of code per fieldRecently had a friend claim that you cant parse and print a blob of data without cutting and pasting stuff every where or using the new c++11 compilers and techniques. <br />
<br />
I have in the past(2012) shown techniques to do this kind of parsing and generation in <a href="2012/10/c11-tuples-and-schema-generation.html">c11-tuples-and-schema-generation.html</a>. So with a few short hours of hacking and here is the proof of concept.. The solution is meta programming black magic but hey u can define the parser and printer (and whatever else u want to add) for "SomeMessage" in basically 1 line of code per field.<br />
<br />
<pre class="brush:cpp">// compile with:
// g++ -I"c:\tools\boost_1_49_0" -L"c:\tools\boost_1_49_0\stage\lib" -static self_print_struct.cpp -o self_print_struct.exe
#include <iostream>
#include <boost/mpl/for_each.hpp>
#include <boost/mpl/list.hpp>
#include <boost/mpl/string.hpp>
#include <boost/mpl/range_c.hpp>
template <typename T,
typename N>
struct Field
{
typedef T Type;
typedef N Name;
static unsigned char* print(std::ostream& os, unsigned char* ptr)
{
os << boost::mpl::c_str<N>::value
<< " : " << *(static_cast<T*>(static_cast<void*>(ptr)));
return ptr + sizeof(Type);
}
};
template <typename Base>
struct PrintMixin
{
struct DoPrint
{
unsigned char* cursor_;
DoPrint(unsigned char* cursor) :
cursor_(cursor)
{}
template< typename U > void operator()(U x)
{
std::cout << " + ";
U::print(std::cout, cursor_);
std::cout << '\n';
cursor_ += sizeof(typename U::Type);
}
};
static void print(std::ostream& os, unsigned char* ptr)
{
boost::mpl::for_each< typename Base::Type >( DoPrint(ptr) );
}
};
struct SomeMessage : PrintMixin<SomeMessage>
{
typedef boost::mpl::list<Field<int, boost::mpl::string<'fiel','d 1'> >,
Field<char, boost::mpl::string<'fiel','d 2'> > > Type;
};
struct AnotherMessage : PrintMixin<AnotherMessage>
{
typedef boost::mpl::list<Field<int, boost::mpl::string<'this'> >,
Field<char, boost::mpl::string<'that'> >,
Field<float, boost::mpl::string<'the ','othe','r'> >
> Type;
};
int main()
{
unsigned char message1[] =
{
0x01,0x02,0x03,0x04,
'a'
};
std::cout << "message 1\n";
SomeMessage::print(std::cout, message1);
unsigned char message2[] =
{
0x15, 0xCD, 0x5B, 0x07,
'b',
0x19, 0x04, 0x9e, 0x3f
};
std::cout << "message 2\n";
AnotherMessage::print(std::cout, message2);
}
</pre><br />
And the output looks like:<br />
<pre>$ self_print_struct.exe
message 1
+ field 1 : 67305985
+ field 2 : a
message 2
+ this : 123456789
+ that : b
+ the other : 1.2345
</pre><br />
Ashley Smarthttp://www.blogger.com/profile/06991073477908020603noreply@blogger.com0