Skip to content

Instantly share code, notes, and snippets.

@spicydog
Last active May 19, 2016 17:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save spicydog/5014ed51e43b1b46a1b0 to your computer and use it in GitHub Desktop.
Save spicydog/5014ed51e43b1b46a1b0 to your computer and use it in GitHub Desktop.
<?php
$fullActions = ['U'=>'Up', 'L'=>'Left', 'R'=>'Right', 'D'=>'Down'];
$fullMoves = ['U'=>'^', 'L'=>'<', 'R'=>'>', 'D'=>'v'];
$rewards = [[-0.04,-0.04,-0.04],
[-0.04,-1,1],
[-0.04,-0.04,-0.04]];
$actions = [
'U'=>['U'=>0.8,'L'=>0.1,'R'=>0.1,'D'=>0],
'L'=>['U'=>0.1,'L'=>0.9,'R'=>0.0,'D'=>0.0],
'R'=>['U'=>0.1,'L'=>0.0,'R'=>0.8,'D'=>0.1],
'D'=>['U'=>0.0,'L'=>0.0,'R'=>0.1,'D'=>0.9]];
$utilities = [[0,0,0],
[0,-1,1],
[0,0,0]
];
$policy = [['.','.','.'],
['.','.','.'],
['.','.','.']];
$p = [1,0];
for($i=0; $i<500; $i++) {
printf("Step: %d\n", $i+1);
$results = [];
foreach ($actions as $action => $props) {
if (!isset($results[$action])) {
$results[$action] = 0;
}
foreach ($props as $move => $prop) {
$results[$action] += $prop * getU($utilities, $p, $move);
}
printf("%s = %.4f\t", $action, $results[$action]);
}
echo "\n";
arsort($results);
$bestMove = key($results);
$maxMove = $results[$bestMove];
$newP = move($rewards, $p, $bestMove);
$reword = $rewards[$p[0]][$p[1]];
$utilities[$newP[0]][$newP[1]] = $reword + $maxMove;
printf("U(%d,%d)\t= %.4f + 1.0 x %.4f\n", $p[0]+1,$p[1]+1, $reword, $maxMove);
printf("U(%d,%d)\t= %.4f\n", $p[0]+1,$p[1]+1, $reword + $maxMove);
printf("π(%d,%d)\t= %s\n", $p[0]+1,$p[1]+1, $fullActions[$bestMove]);
$policy[$p[0]][$p[1]] = $fullMoves[$bestMove];
$p = $newP;
echo "\nUtilities:\n";
printMatrix($utilities, $p);
if (($p[0]==1 && $p[1]==1) || ($p[0]==1 && $p[1]==2)) {
$p = [1,0];
}
}
echo "Policy:\n";
printStrMatrix($policy);
// print_r($utilities);
function printMatrix($m, $p=[-1,-1]) {
foreach ($m as $k1 => $r) {
foreach ($r as $k2 => $c) {
$mark = '';
if ($p[0]==$k1 && $p[1]==$k2) {
$mark = '*';
}
printf("%.3f%s\t",$c,$mark);
}
echo "\n";
}
echo "\n";
}
function printStrMatrix($m, $p=[-1,-1]) {
foreach ($m as $k1 => $r) {
foreach ($r as $k2 => $c) {
$mark = '';
if ($p[0]==$k1 && $p[1]==$k2) {
$mark = '*';
}
printf("%s%s\t",$c,$mark);
}
echo "\n";
}
echo "\n";
}
function getU($matrix, $now, $move) {
$now = move($matrix, $now, $move);
return $matrix[$now[0]][$now[1]];
}
function move($matrix, $now, $move) {
if ($move === 'U') {
$now[0] -= 1;
}
if ($move === 'L') {
$now[1] -= 1;
}
if ($move === 'R') {
$now[1] += 1;
}
if ($move === 'D') {
$now[0] += 1;
}
if ($now[0] < 0) {
$now[0] = 0;
}
if ($now[1] < 0) {
$now[1] = 0;
}
if ($now[0] >= count($matrix)-1) {
$now[0] = count($matrix)-1;
}
if ($now[1] > count($matrix)-1) {
$now[1] = count($matrix)-1;
}
return $now;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment