Last active
January 12, 2018 22:40
-
-
Save Cosxin/30311bfad6a25e03ae56 to your computer and use it in GitHub Desktop.
DynaQ
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
A path finder visualization with Q-Learner. Built with D3. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://d3js.org/d3-array/ Version 1.2.1. Copyright 2017 Mike Bostock. | |
!function(n,r){"object"==typeof exports&&"undefined"!=typeof module?r(exports):"function"==typeof define&&define.amd?define(["exports"],r):r(n.d3=n.d3||{})}(this,function(n){"use strict";function r(n){return function(r,t){return f(n(r),t)}}function t(n,r){return[n,r]}function e(n,r,t){var e=(r-n)/Math.max(0,t),o=Math.floor(Math.log(e)/Math.LN10),u=e/Math.pow(10,o);return o>=0?(u>=b?10:u>=q?5:u>=L?2:1)*Math.pow(10,o):-Math.pow(10,-o)/(u>=b?10:u>=q?5:u>=L?2:1)}function o(n,r,t){var e=Math.abs(r-n)/Math.max(0,t),o=Math.pow(10,Math.floor(Math.log(e)/Math.LN10)),u=e/o;return u>=b?o*=10:u>=q?o*=5:u>=L&&(o*=2),r<n?-o:o}function u(n){return n.length}var f=function(n,r){return n<r?-1:n>r?1:n>=r?0:NaN},l=function(n){return 1===n.length&&(n=r(n)),{left:function(r,t,e,o){for(null==e&&(e=0),null==o&&(o=r.length);e<o;){var u=e+o>>>1;n(r[u],t)<0?e=u+1:o=u}return e},right:function(r,t,e,o){for(null==e&&(e=0),null==o&&(o=r.length);e<o;){var u=e+o>>>1;n(r[u],t)>0?o=u:e=u+1}return e}}},i=l(f),a=i.right,h=i.left,c=function(n,r){null==r&&(r=t);for(var e=0,o=n.length-1,u=n[0],f=new Array(o<0?0:o);e<o;)f[e]=r(u,u=n[++e]);return f},s=function(n,r,e){var o,u,f,l,i=n.length,a=r.length,h=new Array(i*a);for(null==e&&(e=t),o=f=0;o<i;++o)for(l=n[o],u=0;u<a;++u,++f)h[f]=e(l,r[u]);return h},g=function(n,r){return r<n?-1:r>n?1:r>=n?0:NaN},M=function(n){return null===n?NaN:+n},v=function(n,r){var t,e,o=n.length,u=0,f=-1,l=0,i=0;if(null==r)for(;++f<o;)isNaN(t=M(n[f]))||(e=t-l,l+=e/++u,i+=e*(t-l));else for(;++f<o;)isNaN(t=M(r(n[f],f,n)))||(e=t-l,l+=e/++u,i+=e*(t-l));if(u>1)return i/(u-1)},p=function(n,r){var t=v(n,r);return t?Math.sqrt(t):t},d=function(n,r){var t,e,o,u=n.length,f=-1;if(null==r){for(;++f<u;)if(null!=(t=n[f])&&t>=t)for(e=o=t;++f<u;)null!=(t=n[f])&&(e>t&&(e=t),o<t&&(o=t))}else for(;++f<u;)if(null!=(t=r(n[f],f,n))&&t>=t)for(e=o=t;++f<u;)null!=(t=r(n[f],f,n))&&(e>t&&(e=t),o<t&&(o=t));return[e,o]},y=Array.prototype,N=y.slice,m=y.map,w=function(n){return function(){return n}},A=function(n){return n},x=function(n,r,t){n=+n,r=+r,t=(o=arguments.length)<2?(r=n,n=0,1):o<3?1:+t;for(var e=-1,o=0|Math.max(0,Math.ceil((r-n)/t)),u=new Array(o);++e<o;)u[e]=n+e*t;return u},b=Math.sqrt(50),q=Math.sqrt(10),L=Math.sqrt(2),k=function(n,r,t){var o,u,f,l,i=-1;if(r=+r,n=+n,t=+t,n===r&&t>0)return[n];if((o=r<n)&&(u=n,n=r,r=u),0===(l=e(n,r,t))||!isFinite(l))return[];if(l>0)for(n=Math.ceil(n/l),r=Math.floor(r/l),f=new Array(u=Math.ceil(r-n+1));++i<u;)f[i]=(n+i)*l;else for(n=Math.floor(n*l),r=Math.ceil(r*l),f=new Array(u=Math.ceil(n-r+1));++i<u;)f[i]=(n-i)/l;return o&&f.reverse(),f},S=function(n){return Math.ceil(Math.log(n.length)/Math.LN2)+1},j=function(){function n(n){var u,f,l=n.length,i=new Array(l);for(u=0;u<l;++u)i[u]=r(n[u],u,n);var h=t(i),c=h[0],s=h[1],g=e(i,c,s);Array.isArray(g)||(g=o(c,s,g),g=x(Math.ceil(c/g)*g,Math.floor(s/g)*g,g));for(var M=g.length;g[0]<=c;)g.shift(),--M;for(;g[M-1]>s;)g.pop(),--M;var v,p=new Array(M+1);for(u=0;u<=M;++u)v=p[u]=[],v.x0=u>0?g[u-1]:c,v.x1=u<M?g[u]:s;for(u=0;u<l;++u)f=i[u],c<=f&&f<=s&&p[a(g,f,0,M)].push(n[u]);return p}var r=A,t=d,e=S;return n.value=function(t){return arguments.length?(r="function"==typeof t?t:w(t),n):r},n.domain=function(r){return arguments.length?(t="function"==typeof r?r:w([r[0],r[1]]),n):t},n.thresholds=function(r){return arguments.length?(e="function"==typeof r?r:w(Array.isArray(r)?N.call(r):r),n):e},n},F=function(n,r,t){if(null==t&&(t=M),e=n.length){if((r=+r)<=0||e<2)return+t(n[0],0,n);if(r>=1)return+t(n[e-1],e-1,n);var e,o=(e-1)*r,u=Math.floor(o),f=+t(n[u],u,n);return f+(+t(n[u+1],u+1,n)-f)*(o-u)}},_=function(n,r,t){return n=m.call(n,M).sort(f),Math.ceil((t-r)/(2*(F(n,.75)-F(n,.25))*Math.pow(n.length,-1/3)))},z=function(n,r,t){return Math.ceil((t-r)/(3.5*p(n)*Math.pow(n.length,-1/3)))},D=function(n,r){var t,e,o=n.length,u=-1;if(null==r){for(;++u<o;)if(null!=(t=n[u])&&t>=t)for(e=t;++u<o;)null!=(t=n[u])&&t>e&&(e=t)}else for(;++u<o;)if(null!=(t=r(n[u],u,n))&&t>=t)for(e=t;++u<o;)null!=(t=r(n[u],u,n))&&t>e&&(e=t);return e},I=function(n,r){var t,e=n.length,o=e,u=-1,f=0;if(null==r)for(;++u<e;)isNaN(t=M(n[u]))?--o:f+=t;else for(;++u<e;)isNaN(t=M(r(n[u],u,n)))?--o:f+=t;if(o)return f/o},O=function(n,r){var t,e=n.length,o=-1,u=[];if(null==r)for(;++o<e;)isNaN(t=M(n[o]))||u.push(t);else for(;++o<e;)isNaN(t=M(r(n[o],o,n)))||u.push(t);return F(u.sort(f),.5)},P=function(n){for(var r,t,e,o=n.length,u=-1,f=0;++u<o;)f+=n[u].length;for(t=new Array(f);--o>=0;)for(e=n[o],r=e.length;--r>=0;)t[--f]=e[r];return t},R=function(n,r){var t,e,o=n.length,u=-1;if(null==r){for(;++u<o;)if(null!=(t=n[u])&&t>=t)for(e=t;++u<o;)null!=(t=n[u])&&e>t&&(e=t)}else for(;++u<o;)if(null!=(t=r(n[u],u,n))&&t>=t)for(e=t;++u<o;)null!=(t=r(n[u],u,n))&&e>t&&(e=t);return e},B=function(n,r){for(var t=r.length,e=new Array(t);t--;)e[t]=n[r[t]];return e},C=function(n,r){if(t=n.length){var t,e,o=0,u=0,l=n[u];for(null==r&&(r=f);++o<t;)(r(e=n[o],l)<0||0!==r(l,l))&&(l=e,u=o);return 0===r(l,l)?u:void 0}},E=function(n,r,t){for(var e,o,u=(null==t?n.length:t)-(r=null==r?0:+r);u;)o=Math.random()*u--|0,e=n[u+r],n[u+r]=n[o+r],n[o+r]=e;return n},G=function(n,r){var t,e=n.length,o=-1,u=0;if(null==r)for(;++o<e;)(t=+n[o])&&(u+=t);else for(;++o<e;)(t=+r(n[o],o,n))&&(u+=t);return u},H=function(n){if(!(o=n.length))return[];for(var r=-1,t=R(n,u),e=new Array(t);++r<t;)for(var o,f=-1,l=e[r]=new Array(o);++f<o;)l[f]=n[f][r];return e},J=function(){return H(arguments)};n.bisect=a,n.bisectRight=a,n.bisectLeft=h,n.ascending=f,n.bisector=l,n.cross=s,n.descending=g,n.deviation=p,n.extent=d,n.histogram=j,n.thresholdFreedmanDiaconis=_,n.thresholdScott=z,n.thresholdSturges=S,n.max=D,n.mean=I,n.median=O,n.merge=P,n.min=R,n.pairs=c,n.permute=B,n.quantile=F,n.range=x,n.scan=C,n.shuffle=E,n.sum=G,n.ticks=k,n.tickIncrement=e,n.tickStep=o,n.transpose=H,n.variance=v,n.zip=J,Object.defineProperty(n,"__esModule",{value:!0})}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html> | |
<head> | |
<meta http-equiv="Content-Type" content="text/html;charset=utf-8"> | |
<title>D3.js Charts</title> | |
<script src="https://d3js.org/d3.v4.js"></script> | |
<script src="https://d3js.org/d3-scale-chromatic.v1.min.js"></script> | |
</head> | |
<style> | |
#stage{ | |
position:fixed; | |
top:50%; | |
left:50%; | |
width:500px; | |
height:500px; | |
margin: -350px 0 0 -250px; | |
} | |
line, rect { | |
stroke: #000; | |
} | |
rect{ | |
opacity: .8; | |
} | |
.background{ | |
fill:#AAA; | |
} | |
</style> | |
<body> | |
<div id="stage"> | |
<h2>Path finding with Q-Learning</h2> | |
<h5>It will take about 2 mins, require WebWorker browser support</h5> | |
<h5>The more frequent a cell was visited in current iteration, the darker it will be</h5> | |
<div> | |
</div> | |
<div class="main" style="width:100%;display:flex"> | |
</div> | |
</div> | |
<script type="text/javascript"> | |
var genBoard = function(n, p){ | |
B = d3.range(n).map(d=>d3.range(n).map(function(){ return Math.random() < p ? 1 : 0})); | |
B[0][0] = 2, B[n-1][n-1] = 3, B[0][1] = 0, B[1][0] = 0, B[n-1][n-2]=0, B[n-2][n-1]=0; | |
return B; | |
} | |
var n = 80, p = 0.25, | |
dat = genBoard(n, p); | |
var width = 400, | |
height = 400; //sv18g width height | |
var margin = {top:10,right:0,bottom:0,left:0}; | |
var x = d3.scaleBand().domain(d3.range(n)).range([0,width]), | |
y = d3.scaleBand().domain(d3.range(n)).range([0,height]); | |
var fills = ['#FFFFFF','#404040','#2b83ba','#d7191c']; //default fills | |
var svg = d3.select('.main').insert('svg',':first-child') | |
.attr('width', width + margin.left + margin.right ) | |
.attr('height', height + margin.top + margin.bottom ) | |
.attr("class","martixChart") | |
.append('g') | |
.attr('transform', 'translate(' + margin.left + ',' + margin.top + ')'); | |
svg.append("rect") | |
.attr("class","background") | |
.attr("width",width) | |
.attr("height",height) | |
var rows = svg.selectAll(".row") | |
.data(dat) | |
.enter() | |
.append("g") | |
.attr("class","row") | |
.attr('transform', function(d,i){return 'translate(' + 0 + ',' + y(i) + ')';}) | |
.each(row); | |
function row(d){ | |
var cells = d3.select(this).selectAll(".cell") | |
.data(d) | |
.enter() | |
.append("rect") | |
.attr("class","cell") | |
.attr("x",function(d,i){return x(i);}) | |
.attr("height",function(d){return x.bandwidth();}) | |
.attr("width",function(d){return x.bandwidth();}) | |
.style('fill',function(d,i,j){return fills[+d];}); | |
} | |
function updateCell(Freq){ | |
d3.selectAll('.cell').style('fill',function(d,i){return dat[Math.floor(i/n)][i%n] == 1 ? "#000000" : d3.interpolateReds(Freq[i]);}); | |
} | |
function updateBoard(F){ | |
var fmax=d3.max(F),fmin=d3.min(F); | |
updateCell(F.map(function(d){return d / (fmax-fmin);})); | |
} | |
if (window.Worker) | |
{ | |
var worker = new Worker("qlearn.js"); | |
worker.postMessage(dat); | |
worker.onmessage = function(e){ | |
updateBoard(e.data); | |
} | |
} | |
</script> | |
</body> | |
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
importScripts("d3-array.min.js"); | |
/*Worker Thread where computation happens*/ | |
var dat; | |
var n = 80; | |
onmessage = function(e){ | |
dat = e.data; | |
start(); | |
} | |
var startpos = [0,0], endpos = [n-1,n-1]; | |
//QLearner | |
var QL = { | |
createNew : function(num_states,num_actions,alpha, gamma ,rar ,radr ,dyna, alphar){ | |
var ql = {}; | |
var num_actions = num_actions || 4; | |
var num_states = num_states || 100; | |
var Q = d3.range(num_states).map(function(){return d3.range(num_actions).map(function(){return Math.random();});}); | |
var T = d3.range(num_states).map(function(){ | |
return d3.range(num_actions).map(function(){ | |
return d3.range(num_states).map(function(){ | |
return 1.0 / 100000; | |
}); | |
}); | |
}); | |
var Tc = T.slice(); | |
var R = Q.slice().map(function(){return 0.0;}); | |
var alpha = alpha || 0.2; | |
var alphar = alphar || alpha; | |
var rar = rar || 0.98; | |
var radr = radr || 0.995; | |
var gamma = gamma || 0.9; | |
var dyna = dyna || 0; | |
var s = -1; //cur state | |
var a = -1; //cur action | |
var isRunning = false; | |
ql.firstAction = function(state){ | |
s = state; | |
var action = QL.argmax(Q[s]); | |
rar = rar * radr; | |
a = action; | |
return action; | |
}; | |
ql.getAction = function(s_prime,r){ | |
Q[s][a] = (1 - alpha) * Q[s][a] + alpha * (r + gamma * d3.max(Q[s_prime])); | |
var action = QL.argmax(Q[s_prime]); | |
//Tc[s][s_prime][a] += 1.0; | |
if(Math.random() < rar) | |
action = Math.floor(Math.random() * 4); | |
rar *= radr; | |
a = action; | |
s = s_prime; | |
return action; | |
}; | |
ql.getQ = function(){ | |
return Q; | |
} | |
return ql; | |
}, | |
argmax : function(arr){ | |
var max = -Infinity; | |
var index = 0; | |
var a = arr.map(function(d){return Number(d);}); | |
for(var i = -1; ++i < a.length; ){ | |
if(a[i] > max){ | |
max = a[i]; | |
index = i; | |
} | |
} | |
return index; | |
} | |
}; | |
//End of QLearner | |
//1 go up, 2 right, 3 down 4 left | |
function move(oldPos,action){ | |
var newPos = [0,0]; | |
newPos[0] = (action == 3) ? oldPos[0] - 1 : (action == 1) ? oldPos[0] + 1: oldPos[0]; | |
newPos[1] = (action == 2) ? oldPos[1] + 1 : (action == 0) ? oldPos[1] - 1: oldPos[1]; | |
if(dat[newPos[0]] == undefined || dat[newPos[0]][newPos[1]] == undefined || dat[newPos[0]][newPos[1]] == 1) | |
return oldPos.slice(); | |
return newPos.slice(); | |
} | |
function samePos(Pos1,Pos2){ | |
return Pos1[0] == Pos2[0] && Pos2[1] == Pos1[1]; | |
} | |
function toState(pos){ | |
return pos[0] * n + pos[1]; | |
} | |
var q = QL.createNew(n*n, 4); | |
function start(iter = 50000){ | |
console.log("start"); | |
var iterations = iter || 50000; | |
//A Buffer to store last 10 Freq | |
//Only Add Freq in last 10 iterations | |
var FCircular = d3.range(n*n).map(d=>[0,0,0,0,0,0,0,0,0,0]); | |
for(var i = -1; ++i < iterations;){ | |
var currpos = startpos.slice(); | |
var steps = 0; | |
var s = toState(startpos); | |
var action = q.firstAction(s); | |
var F = d3.range(n*n).map(d=>0); | |
while(!samePos(currpos,endpos)){ | |
currpos = move(currpos,action); | |
if(samePos(currpos,endpos)) | |
reward = 1; | |
else | |
reward = -1; | |
s = toState(currpos); | |
F[toState(currpos)]++; //update F | |
action = q.getAction(s,reward); | |
steps++; | |
if(steps > 2500000) { console.log('no path'); return;} | |
} | |
FCircular.map(function(d,i){ | |
FCircular[i].push(F[i]); | |
FCircular[i].shift(); | |
}); | |
F = FCircular.map(d=>d3.sum(d)); | |
//render every 100 iterations, For performance | |
if(i % 100 == 0) postMessage(F); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment