Skip to content

Instantly share code, notes, and snippets.

@chrahunt
Last active April 21, 2016 04:19
Show Gist options
  • Save chrahunt/881595e87e3e960ad92f to your computer and use it in GitHub Desktop.
Save chrahunt/881595e87e3e960ad92f to your computer and use it in GitHub Desktop.
Example stabilizer learning bot. Tries to center self on bottom middle tile of OFM map.
// ==UserScript==
// @name TagPro Stabilizer Bot Trainer
// @description Stabilize your ball perfectly on a square.
// @version 0.1
// @include http://tagpro-maptest.koalabeast.com:*
// @include http://tangent.jukejuice.com:*
// @include http://*.newcompte.fr:*
// @require https://raw.githubusercontent.com/karpathy/reinforcejs/master/lib/rl.js
// @require https://github.com/eligrey/FileSaver.js/raw/master/FileSaver.min.js
// @require https://gist.github.com/chrahunt/4843f0258c516882eea0/raw/loopback.user.js
// @author snaps_
// @namespace http://www.reddit.com/user/snaps_
// @license MIT
// ==/UserScript==
(function(window, $) {
// This script is not allowed on public servers. Changing the header
// above to include them is pointless, as actually using the script on
// a public server will get you caught and banned.
// Settings.
var HOTKEY_START= 69; // e
var HOTKEY_STOP = 81; // q
// End settings.
// Override timer functions.
var setTimeout = window.setTimeout,
clearTimeout = window.clearTimeout,
setInterval = window.setInterval,
clearInterval = window.clearInterval;
// Physics step-size.
var STEP = (1 / 60);
// Threshold for distances.
var EPS = (1 / 1e4);
var TILE_WIDTH = 40;
// Test whether two values are within EPSILON.
function equals(a, b) {
return Math.abs(a - b) < EPS;
}
// Randomly sample number from between a and b.
function sample(a, b) {
return a + (b - a) * Math.random();
}
// Get random integer between a and b, non-inclusive.
function discreteSample(a, b) {
var val = Math.floor(a + (b - a) * Math.random());
if (val == b) {
return discreteSample(a, b);
}
return val;
}
// Wait until the tagpro object exists, and add the function to tagpro.ready
function addToTagproReady(fn) {
// Make sure the tagpro object exists.
if (typeof tagpro !== "undefined") {
tagpro.ready(fn);
} else {
// If not ready, try again after a short delay.
setTimeout(function() {
addToTagproReady(fn);
}, 0);
}
}
function chat_message(s) {
tagpro.socket.emit("local:chat", {
to: "all",
message: s
});
}
// Given a position, get the target tile center.
function getTarget(pos) {
return Math.floor(pos.x / TILE_WIDTH) * TILE_WIDTH + 20;
}
// Learning agent.
var Agent = function(env) {
// Settings for DQNAgent.
var spec = { alpha: 0.01 };
this.brain = new RL.DQNAgent(env, spec);
this.env = env;
};
// Get and take action.
Agent.prototype.forward = function() {
var state = this.env.normalize(this.env.getState());
var action = this.brain.act(state);
this.env.act(action);
};
// Learn from reward.
Agent.prototype.backward = function(reward) {
this.brain.learn(reward);
};
// Interface for interacting with world.
var Env = function() {
this.numStates = 2;
this.numActions = 3;
// Overrides to keep velocity up to date.
Box2D.Dynamics.b2Body.prototype.GetLinearVelocity = function() {
tagpro.players[this.player.id].vx = this.m_linearVelocity.x;
return this.m_linearVelocity;
};
// Actions.
this.actions = {
"0": ["left"],
"1": ["right"],
"2": []
};
this.dirs = ["left", "right"];
this.oppositeDir = {
left: "right",
right: "left"
};
this.viewport = $("#viewport");
this.range = 2 * TILE_WIDTH;
};
Env.prototype.getKeyCode = function(dir) {
return tagpro.keys[dir][0];
};
Env.prototype.setTarget = function(target) {
this.target = target;
};
Env.prototype.getPosition = function() {
return tagpro.players[tagpro.playerId].x + TILE_WIDTH / 2;
};
Env.prototype.getVelocity = function() {
return tagpro.players[tagpro.playerId].vx;
};
// Return a normalized state for learning.
Env.prototype.normalize = function(state) {
return [
(state[0] - 30) / this.range,
(state[1] - 0.25) / 0.5
];
};
Env.prototype.isPressing = function(dir) {
return tagpro.players[tagpro.playerId].pressing[dir];
};
// Act on environment.
Env.prototype.act = function(n) {
var action = this.actions[n];
this.move(action);
};
// Release directional button.
Env.prototype.release = function(dir) {
var e = $.Event("keyup");
e.keyCode = this.getKeyCode(dir);
this.viewport.trigger(e);
};
// Release all directional buttons.
Env.prototype.releaseAll = function() {
this.move([]);
};
// Press directional button.
Env.prototype.press = function(dir) {
tagpro.sendKeyPress(dir);
};
// Takes array of directions to press.
Env.prototype.move = function(presses) {
presses.forEach(function(dir) {
this.press(dir);
var opposite = this.oppositeDir[dir];
if (this.isPressing(opposite)) {
this.release(opposite);
}
}, this);
this.dirs.forEach(function(dir) {
if (presses.indexOf(dir) == -1) {
this.release(dir);
}
}, this);
};
// Get environment state.
Env.prototype.getState = function() {
var pos = this.getPosition();
var v = this.getVelocity();
var t = this.target - pos;
var state = [t, v];
return state;
};
Env.prototype.isTerminalState = function(state) {
var onTarget = equals(state[0], 0) && equals(state[1], 0);
var outOfRange = Math.abs(state[0]) > this.range;
return onTarget || outOfRange;
};
Env.prototype.getNumStates = function() {
return this.numStates;
};
Env.prototype.getMaxNumActions = function() {
return this.numActions;
};
// Calculate reward.
Env.prototype.getReward = function(state) {
if (this.isTerminalState(state)) {
if (equals(state[0], 0) && equals(state[1], 0)) {
// Desired terminal state.
return 10;
} else if (Math.abs(state[0]) > this.range) {
// Outside of range, bad terminal state.
return -10;
}
} else if (equals(state[0], 0)) {
// On top of center point even if it hasn't stopped.
return 1;
}
// Calculate reward otherwise.
var dist = Math.abs(state[0]);
if (Math.abs(state[1]) > 0.2) {
return -(dist / this.range) + -(Math.abs(state[1]) / 2.5);
} else {
return -(dist / this.range);
}
};
var Experiment = function(opts) {
this.env = opts.env;
this.agent = opts.agent;
this.start = opts.start; // left/right
this.target_loc = opts.target_loc; // Location to initiate the agent.
this.target_v = opts.target_v;
this.interval = STEP * 10; // step size.
// Starting locations on OFM map.
this.start_locs = {
left: 59,
right: 941
};
};
// Run experiment.
Experiment.prototype.run = function() {
// Get into start position.
this.env.press(this.start);
var start = setInterval(function() {
if (equals(this.env.getPosition(), this.start_locs[this.start])) {
clearInterval(start);
this.env.releaseAll();
// Initiate experiment.
this._start();
}
}.bind(this), 20);
};
// Get into state for experiment and handoff control to agent.
Experiment.prototype._start = function() {
//console.log("Starting experiment.");
chat_message("Starting episode.");
// Interval to ensure velocity and position constraints.
var running = setInterval(function() {
var pos = this.env.getPosition();
var v = this.env.getVelocity();
// Transfer control to agent when close.
if (Math.abs(this.target_loc - pos) < 5) {
clearInterval(running);
this.transferToAgent();
return;
}
if (Math.abs(v - this.target_v) < 0.05) {
this.env.releaseAll();
} else if (v < this.target_v) {
if (pos < this.target_loc) {
this.env.press("right");
} else if (pos > this.target_loc) {
this.env.press("left");
}
}
}.bind(this), 20);
};
// Set function to be called on each learning step of the agent.
Experiment.prototype.onStep = function(fn) {
this.step_fn = fn;
};
// Set function to be called when actual experiment starts.
Experiment.prototype.onStart = function(fn) {
this.start_fn = fn;
};
// Set function to be called when experiment has ended.
Experiment.prototype.onComplete = function(fn) {
this.complete_fn = fn;
};
// Terminate experiment prematurely.
Experiment.prototype.terminate = function() {
clearTimeout(this.update);
this.env.releaseAll();
if (this.stdUpdate) {
tagpro.world.update = this.stdUpdate;
}
chat_message("Episode terminated.");
//console.log("Experiment terminated.");
};
// Reset necessary state.
Experiment.prototype.complete = function(forced) {
clearTimeout(this.update);
this.env.releaseAll();
//console.log("Experiment completed.");
chat_message("Episode completed.");
if (this.complete_fn) {
this.complete_fn();
}
};
// Transfer control to agent and start learning cycle.
Experiment.prototype.transferToAgent = function() {
//console.log("Transferring control to agent.");
chat_message("Transferring control to agent.");
// Whether the agent is anticipating a reward.
var giveReward = false;
// Overrides to reward agent immediately after actions taken into
// account.
this.stdUpdate = tagpro.world.update;
var update = function rewardUpdate() {
if (giveReward) {
giveReward = false;
//console.log("Updating.");
var state = this.env.getState();
var reward = this.env.getReward(state);
this.agent.backward(reward);
if (this.step_fn) {
this.step_fn(reward, state, this.agent, this.env);
}
// Check if terminal state and quit if so.
if (this.env.isTerminalState(state)) {
this.complete();
tagpro.world.update = this.stdUpdate;
window.override = false;
}
}
}.bind(this);
var stdUpdate = this.stdUpdate;
window.newUpdate = function testName() {
stdUpdate.apply(tagpro.world, arguments);
setTimeout(update);
};
tagpro.world.update = newUpdate;
window.override = true;
var update_fn = function actionInterval() {
// Skip if reward hasn't been sent to agent yet.
if (!giveReward) {
giveReward = true;
this.agent.forward();
//console.log("Forward.");
} else {
console.log("Skipping.");
}
this.update = setTimeout(update_fn, this.interval);
}.bind(this);
// Call start callback.
if (this.start_fn) {
this.start_fn();
}
this.update = setTimeout(update_fn, this.interval);
};
// Get the range that a tile spans given its x location.
function tileRange(x) {
return [x * TILE_WIDTH, (x + 1) * TILE_WIDTH];
}
// Get the value of the center of a tile.
function tileCenter(x) {
return x * TILE_WIDTH + TILE_WIDTH / 2;
}
// Interface for drawing on the game.
var Draw = function() {
this.graphics = new PIXI.Graphics();
this._waitForBackground(function() {
tagpro.renderer.layers.background.addChild(this.graphics);
}.bind(this));
this.tile_color = 0xdddddd;
this.tile_opacity = 0.5;
};
// Wait until background is constructed to execute function.
Draw.prototype._waitForBackground = function(fn) {
if (tagpro.renderer.layers.background.children.length === 1) {
fn();
} else {
setTimeout(function() {
this._waitForBackground(fn);
}.bind(this), 100);
}
};
// Highlight tile at provided location.
Draw.prototype.highlightTile = function(x, y) {
this.graphics.beginFill(this.tile_color, this.tile_opacity);
this.graphics.drawRect(x * TILE_WIDTH, y * TILE_WIDTH, TILE_WIDTH, TILE_WIDTH);
this.graphics.endFill();
};
Draw.prototype.line = function(x1, y1, x2, y2) {
this.graphics.lineStyle(2, 0xff1111, 1);
this.graphics.moveTo(x1, y1);
this.graphics.lineTo(x2, y2);
this.graphics.lineStyle(0, 0, 0);
};
Draw.prototype.point = function(x, y) {
this.circle(x, y, 2);
};
Draw.prototype.circle = function(x, y, r) {
this.graphics.lineStyle(2, 0xff1111, 1);
this.graphics.drawCircle(x, y, r);
this.graphics.lineStyle(0, 0, 0);
};
// Reset drawings.
Draw.prototype.reset = function() {
this.graphics.clear();
};
// Takes and displays information about the state of the agent.
var Info = function() {
// Info about the agent, its velocity and position.
var real_time = {
velocity: '',
position: ''
};
// Minimum number of ms between interface updates.
this.update_interval = 200;
// Start of text offset.
this.x_offset = 20;
this.y_offset = 100;
// Holds line text.
this.lines = {};
// Hold pixi text objects.
this.texts = {};
// Hold actual values.
this.vals = {};
// Time last updated.
this.last_update = {};
this._inject();
};
Info.prototype._inject = function() {
var stdUpdate = tagpro.ui.update;
tagpro.ui.update = function() {
stdUpdate.apply(tagpro.ui, arguments);
for (var name in this.vals) {
this.texts[name].setText(this.lines[name] + ": " + this.vals[name]);
}
}.bind(this);
};
// Add a line to the display.
Info.prototype.addLine = function(name, label) {
this.lines[name] = label;
this.last_update[name] = 0;
var text = tagpro.renderer.prettyText('');
text.x = this.x_offset;
text.y = this.y_offset;
this.y_offset += 20;
this.texts[name] = text;
tagpro.renderer.layers.ui.addChild(text);
};
// Update information display.
// vals is an object with keys corresponding to the lines
// display is a boolean, whether the values should be forced to display.
Info.prototype.update = function(vals) {
for (var name in vals) {
this.vals[name] = vals[name];
}
};
// For tracking stats long-term.
// Takes display object.
var Stats = function(display) {
// Number of experiments.
this.experiments = 0;
this.smooth_rewards = [];
// Steps between saving reward values.
this.steps = 200;
// Counter for reward value steps.
this.step = 0;
// Maximum number of previous rewards to keep.
this.max_rewards = 1000;
this.time = 0;
this.display = display;
display.addLine('smooth_reward', 'Smooth Reward');
display.addLine('experiment', 'Episodes');
display.addLine('time_trained', 'Total Time Trained (ms)');
};
// Call when starting an experiment.
Stats.prototype.startExp = function() {
this.last_start = Date.now();
};
// Call when experiment ended.
Stats.prototype.endExp = function() {
this.time += Date.now() - this.last_start;
this.experiments++;
this.display.update({
time_trained: this.time,
experiment: this.experiments
});
};
Stats.prototype.addReward = function(reward) {
this.step++;
if (!this.smooth_reward) {
this.smooth_reward = reward;
}
this.smooth_reward = 0.999 * this.smooth_reward + 0.001 * reward;
if (this.step % this.steps === 0) {
this.step = 0;
this.display.update({
smooth_reward: this.smooth_reward.toFixed(3)
});
if (this.smooth_rewards.length > this.max_rewards) {
this.smooth_rewards = this.smooth_rewards.slice(1);
}
this.smooth_rewards.push(this.smooth_reward);
}
};
addToTagproReady(function() {
var drawing = new Draw();
var env = new Env();
var agent = new Agent(env);
var info = new Info();
// Add lines for experiment information display.
info.addLine('error', 'TD Error');
info.addLine('reward', 'Reward');
info.addLine('exp', 'Saved Experiences');
info.addLine('distance', 'Distance');
info.addLine('velocity', 'Velocity');
var stats = new Stats(info);
// Range of possible ball velocity.
var velocity_range = [0.5, 1.5];
// Default y for bottom of map.
var default_y = 17;
var defauly_y_coord = 700;
// Holds active experiment.
var exp;
window.override = false;
// Show velocity.
// Show center location.
// Show target location.
// Show release location.
// Show bounds of area.
// Make accessible outside script.
window.myAgent = agent;
function stepUpdate(reward, state, agent, env) {
stats.addReward(reward);
info.update({
reward: reward.toFixed(3),
error: agent.brain.tderror.toFixed(2),
exp: agent.brain.expi,
distance: state[0].toFixed(3),
velocity: state[1].toFixed(3)
});
}
// Sets up and runs an experiment.
function setupAndRun() {
drawing.reset();
// Get random tile in middle range.
var tile = discreteSample(10, 16);
// Highlight our tile.
drawing.highlightTile(tile, default_y);
var center = tileCenter(tile);
env.setTarget(center);
// Draw tile center.
drawing.point(center, defauly_y_coord);
// Draw bounds.
drawing.circle(center, defauly_y_coord, env.range);
var range = tileRange(tile);
// Get transfer location.
var stop = sample(range[0], range[1]);
var v = sample(velocity_range[0], velocity_range[1]);
exp = new Experiment({
env: env,
agent: agent,
start: "left",
target_loc: stop, // Location to initiate agent.
target_v: v
});
// Update display and statistics.
exp.onStart(function() {
stats.startExp();
});
exp.onStep(stepUpdate);
exp.onComplete(function() {
stats.endExp();
setupAndRun();
});
exp.run();
}
window.agentState = {};
agentState.running = false;
//var running = false;
// Start experiment.
function init() {
if (agentState.running) return;
// Pre-game.
if (tagpro.state == 3) return;
agentState.running = true;
setupAndRun();
}
function reset() {
if (!agentState.running) return;
agentState.running = false;
// Stop experiment.
exp.terminate();
// Release all buttons.
env.releaseAll();
}
// Key listeners.
document.addEventListener("keydown", function(e) {
if (tagpro.disableControls) return;
if (e.keyCode === HOTKEY_START) {
init();
}
});
document.addEventListener("keyup", function(e) {
if (tagpro.disableControls) return;
if (e.keyCode === HOTKEY_STOP) {
reset();
}
});
// Handle death.
tagpro.socket.on("p", function(msg) {
var updates = msg.u || msg;
var dead = updates.some(function(update) {
return update.id === tagpro.playerId && update.dead;
});
if (dead) {
reset();
}
});
// Handle end.
tagpro.socket.on("end", function() {
reset();
});
function downloadData() {
var data = [JSON.stringify(agent.brain.toJSON())];
var blob = new Blob(data, { type: "application/json" });
saveAs(blob, "data-" + Date.now() + ".json");
}
//window.addEventListener('beforeunload', downloadData);
});
})(unsafeWindow, $); // Using page jQuery.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment