Skip to content

Instantly share code, notes, and snippets.

@Cosxin
Last active January 12, 2018 22:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Cosxin/30311bfad6a25e03ae56 to your computer and use it in GitHub Desktop.
Save Cosxin/30311bfad6a25e03ae56 to your computer and use it in GitHub Desktop.
DynaQ
A path finder visualization with Q-Learner. Built with D3.
// https://d3js.org/d3-array/ Version 1.2.1. Copyright 2017 Mike Bostock.
!function(n,r){"object"==typeof exports&&"undefined"!=typeof module?r(exports):"function"==typeof define&&define.amd?define(["exports"],r):r(n.d3=n.d3||{})}(this,function(n){"use strict";function r(n){return function(r,t){return f(n(r),t)}}function t(n,r){return[n,r]}function e(n,r,t){var e=(r-n)/Math.max(0,t),o=Math.floor(Math.log(e)/Math.LN10),u=e/Math.pow(10,o);return o>=0?(u>=b?10:u>=q?5:u>=L?2:1)*Math.pow(10,o):-Math.pow(10,-o)/(u>=b?10:u>=q?5:u>=L?2:1)}function o(n,r,t){var e=Math.abs(r-n)/Math.max(0,t),o=Math.pow(10,Math.floor(Math.log(e)/Math.LN10)),u=e/o;return u>=b?o*=10:u>=q?o*=5:u>=L&&(o*=2),r<n?-o:o}function u(n){return n.length}var f=function(n,r){return n<r?-1:n>r?1:n>=r?0:NaN},l=function(n){return 1===n.length&&(n=r(n)),{left:function(r,t,e,o){for(null==e&&(e=0),null==o&&(o=r.length);e<o;){var u=e+o>>>1;n(r[u],t)<0?e=u+1:o=u}return e},right:function(r,t,e,o){for(null==e&&(e=0),null==o&&(o=r.length);e<o;){var u=e+o>>>1;n(r[u],t)>0?o=u:e=u+1}return e}}},i=l(f),a=i.right,h=i.left,c=function(n,r){null==r&&(r=t);for(var e=0,o=n.length-1,u=n[0],f=new Array(o<0?0:o);e<o;)f[e]=r(u,u=n[++e]);return f},s=function(n,r,e){var o,u,f,l,i=n.length,a=r.length,h=new Array(i*a);for(null==e&&(e=t),o=f=0;o<i;++o)for(l=n[o],u=0;u<a;++u,++f)h[f]=e(l,r[u]);return h},g=function(n,r){return r<n?-1:r>n?1:r>=n?0:NaN},M=function(n){return null===n?NaN:+n},v=function(n,r){var t,e,o=n.length,u=0,f=-1,l=0,i=0;if(null==r)for(;++f<o;)isNaN(t=M(n[f]))||(e=t-l,l+=e/++u,i+=e*(t-l));else for(;++f<o;)isNaN(t=M(r(n[f],f,n)))||(e=t-l,l+=e/++u,i+=e*(t-l));if(u>1)return i/(u-1)},p=function(n,r){var t=v(n,r);return t?Math.sqrt(t):t},d=function(n,r){var t,e,o,u=n.length,f=-1;if(null==r){for(;++f<u;)if(null!=(t=n[f])&&t>=t)for(e=o=t;++f<u;)null!=(t=n[f])&&(e>t&&(e=t),o<t&&(o=t))}else for(;++f<u;)if(null!=(t=r(n[f],f,n))&&t>=t)for(e=o=t;++f<u;)null!=(t=r(n[f],f,n))&&(e>t&&(e=t),o<t&&(o=t));return[e,o]},y=Array.prototype,N=y.slice,m=y.map,w=function(n){return function(){return n}},A=function(n){return n},x=function(n,r,t){n=+n,r=+r,t=(o=arguments.length)<2?(r=n,n=0,1):o<3?1:+t;for(var e=-1,o=0|Math.max(0,Math.ceil((r-n)/t)),u=new Array(o);++e<o;)u[e]=n+e*t;return u},b=Math.sqrt(50),q=Math.sqrt(10),L=Math.sqrt(2),k=function(n,r,t){var o,u,f,l,i=-1;if(r=+r,n=+n,t=+t,n===r&&t>0)return[n];if((o=r<n)&&(u=n,n=r,r=u),0===(l=e(n,r,t))||!isFinite(l))return[];if(l>0)for(n=Math.ceil(n/l),r=Math.floor(r/l),f=new Array(u=Math.ceil(r-n+1));++i<u;)f[i]=(n+i)*l;else for(n=Math.floor(n*l),r=Math.ceil(r*l),f=new Array(u=Math.ceil(n-r+1));++i<u;)f[i]=(n-i)/l;return o&&f.reverse(),f},S=function(n){return Math.ceil(Math.log(n.length)/Math.LN2)+1},j=function(){function n(n){var u,f,l=n.length,i=new Array(l);for(u=0;u<l;++u)i[u]=r(n[u],u,n);var h=t(i),c=h[0],s=h[1],g=e(i,c,s);Array.isArray(g)||(g=o(c,s,g),g=x(Math.ceil(c/g)*g,Math.floor(s/g)*g,g));for(var M=g.length;g[0]<=c;)g.shift(),--M;for(;g[M-1]>s;)g.pop(),--M;var v,p=new Array(M+1);for(u=0;u<=M;++u)v=p[u]=[],v.x0=u>0?g[u-1]:c,v.x1=u<M?g[u]:s;for(u=0;u<l;++u)f=i[u],c<=f&&f<=s&&p[a(g,f,0,M)].push(n[u]);return p}var r=A,t=d,e=S;return n.value=function(t){return arguments.length?(r="function"==typeof t?t:w(t),n):r},n.domain=function(r){return arguments.length?(t="function"==typeof r?r:w([r[0],r[1]]),n):t},n.thresholds=function(r){return arguments.length?(e="function"==typeof r?r:w(Array.isArray(r)?N.call(r):r),n):e},n},F=function(n,r,t){if(null==t&&(t=M),e=n.length){if((r=+r)<=0||e<2)return+t(n[0],0,n);if(r>=1)return+t(n[e-1],e-1,n);var e,o=(e-1)*r,u=Math.floor(o),f=+t(n[u],u,n);return f+(+t(n[u+1],u+1,n)-f)*(o-u)}},_=function(n,r,t){return n=m.call(n,M).sort(f),Math.ceil((t-r)/(2*(F(n,.75)-F(n,.25))*Math.pow(n.length,-1/3)))},z=function(n,r,t){return Math.ceil((t-r)/(3.5*p(n)*Math.pow(n.length,-1/3)))},D=function(n,r){var t,e,o=n.length,u=-1;if(null==r){for(;++u<o;)if(null!=(t=n[u])&&t>=t)for(e=t;++u<o;)null!=(t=n[u])&&t>e&&(e=t)}else for(;++u<o;)if(null!=(t=r(n[u],u,n))&&t>=t)for(e=t;++u<o;)null!=(t=r(n[u],u,n))&&t>e&&(e=t);return e},I=function(n,r){var t,e=n.length,o=e,u=-1,f=0;if(null==r)for(;++u<e;)isNaN(t=M(n[u]))?--o:f+=t;else for(;++u<e;)isNaN(t=M(r(n[u],u,n)))?--o:f+=t;if(o)return f/o},O=function(n,r){var t,e=n.length,o=-1,u=[];if(null==r)for(;++o<e;)isNaN(t=M(n[o]))||u.push(t);else for(;++o<e;)isNaN(t=M(r(n[o],o,n)))||u.push(t);return F(u.sort(f),.5)},P=function(n){for(var r,t,e,o=n.length,u=-1,f=0;++u<o;)f+=n[u].length;for(t=new Array(f);--o>=0;)for(e=n[o],r=e.length;--r>=0;)t[--f]=e[r];return t},R=function(n,r){var t,e,o=n.length,u=-1;if(null==r){for(;++u<o;)if(null!=(t=n[u])&&t>=t)for(e=t;++u<o;)null!=(t=n[u])&&e>t&&(e=t)}else for(;++u<o;)if(null!=(t=r(n[u],u,n))&&t>=t)for(e=t;++u<o;)null!=(t=r(n[u],u,n))&&e>t&&(e=t);return e},B=function(n,r){for(var t=r.length,e=new Array(t);t--;)e[t]=n[r[t]];return e},C=function(n,r){if(t=n.length){var t,e,o=0,u=0,l=n[u];for(null==r&&(r=f);++o<t;)(r(e=n[o],l)<0||0!==r(l,l))&&(l=e,u=o);return 0===r(l,l)?u:void 0}},E=function(n,r,t){for(var e,o,u=(null==t?n.length:t)-(r=null==r?0:+r);u;)o=Math.random()*u--|0,e=n[u+r],n[u+r]=n[o+r],n[o+r]=e;return n},G=function(n,r){var t,e=n.length,o=-1,u=0;if(null==r)for(;++o<e;)(t=+n[o])&&(u+=t);else for(;++o<e;)(t=+r(n[o],o,n))&&(u+=t);return u},H=function(n){if(!(o=n.length))return[];for(var r=-1,t=R(n,u),e=new Array(t);++r<t;)for(var o,f=-1,l=e[r]=new Array(o);++f<o;)l[f]=n[f][r];return e},J=function(){return H(arguments)};n.bisect=a,n.bisectRight=a,n.bisectLeft=h,n.ascending=f,n.bisector=l,n.cross=s,n.descending=g,n.deviation=p,n.extent=d,n.histogram=j,n.thresholdFreedmanDiaconis=_,n.thresholdScott=z,n.thresholdSturges=S,n.max=D,n.mean=I,n.median=O,n.merge=P,n.min=R,n.pairs=c,n.permute=B,n.quantile=F,n.range=x,n.scan=C,n.shuffle=E,n.sum=G,n.ticks=k,n.tickIncrement=e,n.tickStep=o,n.transpose=H,n.variance=v,n.zip=J,Object.defineProperty(n,"__esModule",{value:!0})});
<html>
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<title>D3.js Charts</title>
<script src="https://d3js.org/d3.v4.js"></script>
<script src="https://d3js.org/d3-scale-chromatic.v1.min.js"></script>
</head>
<style>
#stage{
position:fixed;
top:50%;
left:50%;
width:500px;
height:500px;
margin: -350px 0 0 -250px;
}
line, rect {
stroke: #000;
}
rect{
opacity: .8;
}
.background{
fill:#AAA;
}
</style>
<body>
<div id="stage">
<h2>Path finding with Q-Learning</h2>
<h5>It will take about 2 mins, require WebWorker browser support</h5>
<h5>The more frequent a cell was visited in current iteration, the darker it will be</h5>
<div>
</div>
<div class="main" style="width:100%;display:flex">
</div>
</div>
<script type="text/javascript">
var genBoard = function(n, p){
B = d3.range(n).map(d=>d3.range(n).map(function(){ return Math.random() < p ? 1 : 0}));
B[0][0] = 2, B[n-1][n-1] = 3, B[0][1] = 0, B[1][0] = 0, B[n-1][n-2]=0, B[n-2][n-1]=0;
return B;
}
var n = 80, p = 0.25,
dat = genBoard(n, p);
var width = 400,
height = 400; //sv18g width height
var margin = {top:10,right:0,bottom:0,left:0};
var x = d3.scaleBand().domain(d3.range(n)).range([0,width]),
y = d3.scaleBand().domain(d3.range(n)).range([0,height]);
var fills = ['#FFFFFF','#404040','#2b83ba','#d7191c']; //default fills
var svg = d3.select('.main').insert('svg',':first-child')
.attr('width', width + margin.left + margin.right )
.attr('height', height + margin.top + margin.bottom )
.attr("class","martixChart")
.append('g')
.attr('transform', 'translate(' + margin.left + ',' + margin.top + ')');
svg.append("rect")
.attr("class","background")
.attr("width",width)
.attr("height",height)
var rows = svg.selectAll(".row")
.data(dat)
.enter()
.append("g")
.attr("class","row")
.attr('transform', function(d,i){return 'translate(' + 0 + ',' + y(i) + ')';})
.each(row);
function row(d){
var cells = d3.select(this).selectAll(".cell")
.data(d)
.enter()
.append("rect")
.attr("class","cell")
.attr("x",function(d,i){return x(i);})
.attr("height",function(d){return x.bandwidth();})
.attr("width",function(d){return x.bandwidth();})
.style('fill',function(d,i,j){return fills[+d];});
}
function updateCell(Freq){
d3.selectAll('.cell').style('fill',function(d,i){return dat[Math.floor(i/n)][i%n] == 1 ? "#000000" : d3.interpolateReds(Freq[i]);});
}
function updateBoard(F){
var fmax=d3.max(F),fmin=d3.min(F);
updateCell(F.map(function(d){return d / (fmax-fmin);}));
}
if (window.Worker)
{
var worker = new Worker("qlearn.js");
worker.postMessage(dat);
worker.onmessage = function(e){
updateBoard(e.data);
}
}
</script>
</body>
</html>
importScripts("d3-array.min.js");
/*Worker Thread where computation happens*/
var dat;
var n = 80;
onmessage = function(e){
dat = e.data;
start();
}
var startpos = [0,0], endpos = [n-1,n-1];
//QLearner
var QL = {
createNew : function(num_states,num_actions,alpha, gamma ,rar ,radr ,dyna, alphar){
var ql = {};
var num_actions = num_actions || 4;
var num_states = num_states || 100;
var Q = d3.range(num_states).map(function(){return d3.range(num_actions).map(function(){return Math.random();});});
var T = d3.range(num_states).map(function(){
return d3.range(num_actions).map(function(){
return d3.range(num_states).map(function(){
return 1.0 / 100000;
});
});
});
var Tc = T.slice();
var R = Q.slice().map(function(){return 0.0;});
var alpha = alpha || 0.2;
var alphar = alphar || alpha;
var rar = rar || 0.98;
var radr = radr || 0.995;
var gamma = gamma || 0.9;
var dyna = dyna || 0;
var s = -1; //cur state
var a = -1; //cur action
var isRunning = false;
ql.firstAction = function(state){
s = state;
var action = QL.argmax(Q[s]);
rar = rar * radr;
a = action;
return action;
};
ql.getAction = function(s_prime,r){
Q[s][a] = (1 - alpha) * Q[s][a] + alpha * (r + gamma * d3.max(Q[s_prime]));
var action = QL.argmax(Q[s_prime]);
//Tc[s][s_prime][a] += 1.0;
if(Math.random() < rar)
action = Math.floor(Math.random() * 4);
rar *= radr;
a = action;
s = s_prime;
return action;
};
ql.getQ = function(){
return Q;
}
return ql;
},
argmax : function(arr){
var max = -Infinity;
var index = 0;
var a = arr.map(function(d){return Number(d);});
for(var i = -1; ++i < a.length; ){
if(a[i] > max){
max = a[i];
index = i;
}
}
return index;
}
};
//End of QLearner
//1 go up, 2 right, 3 down 4 left
function move(oldPos,action){
var newPos = [0,0];
newPos[0] = (action == 3) ? oldPos[0] - 1 : (action == 1) ? oldPos[0] + 1: oldPos[0];
newPos[1] = (action == 2) ? oldPos[1] + 1 : (action == 0) ? oldPos[1] - 1: oldPos[1];
if(dat[newPos[0]] == undefined || dat[newPos[0]][newPos[1]] == undefined || dat[newPos[0]][newPos[1]] == 1)
return oldPos.slice();
return newPos.slice();
}
function samePos(Pos1,Pos2){
return Pos1[0] == Pos2[0] && Pos2[1] == Pos1[1];
}
function toState(pos){
return pos[0] * n + pos[1];
}
var q = QL.createNew(n*n, 4);
function start(iter = 50000){
console.log("start");
var iterations = iter || 50000;
//A Buffer to store last 10 Freq
//Only Add Freq in last 10 iterations
var FCircular = d3.range(n*n).map(d=>[0,0,0,0,0,0,0,0,0,0]);
for(var i = -1; ++i < iterations;){
var currpos = startpos.slice();
var steps = 0;
var s = toState(startpos);
var action = q.firstAction(s);
var F = d3.range(n*n).map(d=>0);
while(!samePos(currpos,endpos)){
currpos = move(currpos,action);
if(samePos(currpos,endpos))
reward = 1;
else
reward = -1;
s = toState(currpos);
F[toState(currpos)]++; //update F
action = q.getAction(s,reward);
steps++;
if(steps > 2500000) { console.log('no path'); return;}
}
FCircular.map(function(d,i){
FCircular[i].push(F[i]);
FCircular[i].shift();
});
F = FCircular.map(d=>d3.sum(d));
//render every 100 iterations, For performance
if(i % 100 == 0) postMessage(F);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment