Skip to content

Instantly share code, notes, and snippets.

@demircancelebi
Created February 4, 2020 20:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save demircancelebi/649607d0c9c9d485733f5d59134fe990 to your computer and use it in GitHub Desktop.
Save demircancelebi/649607d0c9c9d485733f5d59134fe990 to your computer and use it in GitHub Desktop.
K-Armed Bandit
const ARMS = 10;
const STEPS = 100000;
const means = [];
const epsilons = [0, 0.01, 0.1];
let rewards = [];
let pulls = [];
for (let i = 0; i < ARMS; i++) {
means.push(Math.random() * 6 - 3);
rewards.push(0);
pulls.push(0);
}
function pullArm(arm) {
return means[arm] + Math.random() - 0.5;
}
for (let e = 0; e < epsilons.length; e++) {
const eps = epsilons[e];
for (let i = 0; i < STEPS; i++) {
const rnd = Math.random();
let arm;
// console.log(rnd);
if (rnd < eps) {
arm = Math.floor(Math.random() * ARMS);
} else {
arm = chooseBest();
}
const reward = pullArm(arm);
rewards[arm] += reward;
pulls[arm] += 1;
}
// clear ev
console.log(eps);
// console.log(pulls);
// console.log(rewards);
console.log(rewards.reduce((a, b) => a + b));
console.log("----");
rewards = [];
pulls = [];
for (let i = 0; i < ARMS; i++) {
rewards.push(0);
pulls.push(0);
}
}
console.log(means);
function chooseBest() {
let best = -1;
let bestResult = -1000000;
rewards.forEach((reward, i) => {
let result;
if (pulls[i] == 0) {
result = 0;
} else {
result = reward/pulls[i];
}
// console.log(result);
if (result > bestResult) {
bestResult = result;
best = i;
}
});
// console.log(best);
return best;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment