Skip to content

Instantly share code, notes, and snippets.

@adrianseeley
Last active December 29, 2015 04:49
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save adrianseeley/7617100 to your computer and use it in GitHub Desktop.
hmm.js
var test =
[
['label0', 100, 200, 300, 400, 500],
['label0', 200, 100, 400, 300, 500],
['label1', 100, 100, 100, 100, 100],
['label1', 100, 100, 100, 100, 100],
['label2', 0, 0, 0, 0, 0],
['label2', 0, 1, 1, 0, 0],
];
var real = require('fs').readFileSync('train.csv', 'utf8');
real = real.split('\n');
real.splice(0, 1); // lop off headers
real.splice(real.length - 1, 1); // lop off empty trailer
real = real.splice(0, 1000); // shorten train
for (var r = 0; r < real.length; r++) {
real[r] = real[r].split(',')
for (var v = 0; v < real[r].length; v++) {
real[r][v] = parseInt(real[r][v]);
}
}
var real_test = require('fs').readFileSync('test.csv', 'utf8');
real_test = real_test.split('\n');
real_test.splice(0, 1); // lop off headers
real_test.splice(real_test.length - 1, 1); // lop off empty trailer
for (var r = 0; r < real_test.length; r++) {
real_test[r] = real_test[r].split(',')
for (var v = 0; v < real_test[r].length; v++) {
real_test[r][v] = parseInt(real_test[r][v]);
}
}
function train (set, exit_tolerance) {
// set vector size to first vector length
var vector_size = set[0].length - 1; // less one for label
// collect cases by labels
var cases = {};
for (var s = 0; s < set.length; s++) {
// remove the label from this case
var label = set[s].splice(0, 1)[0];
// if we dont already have this label create it
if (!cases.hasOwnProperty(label)) cases[label] = [];
// add this case to the label
cases[label].push(set[s]);
}
// create the initial program and rating
var t = new Date().getTime();
var program = '';
var program_rating = rate_program_on_cases(program, cases);
console.log(new Date().getTime() - t);
// track fails for logging
var fails = 0;
// genetic loop while program rating is less than exit tolerance
while (program_rating > exit_tolerance) {
// create and rate mutant program
var mutant_program = create_mutant_program(program, vector_size);
var mutant_program_rating = rate_program_on_cases(mutant_program, cases);
// if mutant program is better than original program
if (mutant_program_rating < program_rating) {
// log success
console.log('(' + program + ')' + program_rating + '->(' + mutant_program + ')' + mutant_program_rating);
// replace program and rating with mutants
program = mutant_program;
program_rating = mutant_program_rating;
} else {
// log failure
fails++;
if (fails == 100000) {
console.log('(' + program + ')' + program_rating + '-(' + mutant_program + ')' + mutant_program_rating);
fails = 0;
}
}
}
// exit tolerance reached!
// create scores per label
var scores_per_label = {};
// loop through each label in cases
for (var label in cases) {
// add label to scores per label
scores_per_label[label] = [];
// loop through each case in this label
for (var c = 0; c < cases[label].length; c++) {
// get the score for this case, add it to the scores for this label
scores_per_label[label].push(run_program_on_case(program, cases[label][c]));
}
}
// return scores per label and program
return [scores_per_label, program, cases];
};
function run_multi (program, multi_set, score_set) {
// create an array for answers
var answers = [];
// loop through each set in the multi_set
for (var m = 0; m < multi_set.length; m++) {
// calculate this set's answer
answers.push(run_one(program, multi_set[m], score_set));
}
// return answers
return answers;
};
function run_one (program, set, score_set) {
// first we run the program on our set to label
var score = run_program_on_case(program, set);
// next we find KNN neighbours
var confidence = [];
// loop through all the labels in our score set
for (var label in score_set) {
// loop through all score_set cases for this label
for (var c = 0; c < score_set[label].length; c++) {
// find the KNN distance and add it to the confidence
confidence.push([label, Math.abs(score_set[label][c] - score)]);
}
}
// sort the confidence by nearest KNN
confidence.sort(function (a, b) {
return a[1] - b[1];
});
// knn number (median over knn)
var knn = 10;
var occ = {};
// loop through top knn confidence to find most occuring (median)
for (var c = 0; c < knn && c < confidence.length; c++) {
// if we dont already have this label in occurences make it
if (!occ.hasOwnProperty(confidence[c][0])) occ[confidence[c][0]] = 0;
// increment label occurence
occ[confidence[c][0]]++;
// NOTE
// this incrementation could be weighted mariokart style
// maybe a golden ratio? fib?
}
// find highest occurence
var highest = null;
var at = null;
// loop through all labels in occurences
for (var label in occ) {
// if we dont yet have a highest or if this label is greater than the highest
if (highest == null || occ[label] > at) {
// update the highest
highest = label;
at = occ[label];
}
}
// return the knn highest
return highest;
};
function create_mutant_program (program, vector_size) {
// create a copy to be the mutant
var mutant = '' + program;
// mutations made
var mutations_made = 0;
// make mutation passes till enough stick
while (mutant == program || mutations_made < 2) {
// randomly decide to add or remove
if (mutant.length == 0 || Math.random() > 0.5) {
// add gene
// randomly choose a simple gene or a numbered gene
if (Math.random() > 0.5) {
// simple gene
// randomly choose a simple gene
var simple_genes = 'ajklmnopq';
var gene = simple_genes[Math.floor(Math.random() * simple_genes.length)];
// inject gene at end
mutant += gene;
// increment mutation counter
mutations_made++;
} else {
// numbered gene
// randomly choose a numbered gene
var numbered_genes = 'bcdefghi';
var gene = numbered_genes[Math.floor(Math.random() * numbered_genes.length)];
// randomly choose a vector x
var x = '' + Math.floor(Math.random() * vector_size);
// inject gene at end with vector x
mutant += gene + x;
// increment mutation counter
mutations_made++;
}
} else {
// remove gene
// choose a target zone
var target = Math.floor(Math.random() * mutant.length);
// if our target zone is a number retry mutation
if ('0123456789'.indexOf(mutant[target]) != -1) continue;
// we hit a letter, find the end of this command
var target_end = target;
// keep pushing the target end until it reaches the end of command
do {
// push target end further
target_end++;
// until we reach over the length (command at end of program)
// and until the target end is not on a number
} while (target_end < mutant.length && '0123456789'.indexOf(mutant[target_end]) != -1);
// remove target to target end
mutant = mutant.substr(0, target) + mutant.substr(target, target_end);
// increment mutation counter
mutations_made++;
}
}
// return the new mutant
return mutant;
};
function rate_program_on_cases (program, cases) {
// create score per label
var scores_per_label = {};
// loop through each label in cases
for (var label in cases) {
// add label to scores per label
scores_per_label[label] = [];
// loop through each case in this label
for (var c = 0; c < cases[label].length; c++) {
// get the score for this case, add it to the scores for this label
scores_per_label[label].push(run_program_on_case(program, cases[label][c]));
}
}
// create a rating
var rating = 0;
// a rating represents how close each alike case is,
// and how far apart each different case is
// sum of distance to alike cases / sum of distance to different case
// create register for ratios
var ratios = [];
// loop through each label left
for (var label_l in scores_per_label) {
// loop through each case left
for (var case_l = 0; case_l < scores_per_label[label_l].length; case_l++) {
// create registers for sums
var sum_alike = 0;
var sum_different = 0;
// loop through each label right
for (var label_r in scores_per_label) {
// loop through each case right
for (var case_r = 0; case_r < scores_per_label[label_r].length; case_r++) {
// calculate distance
var distance = Math.abs(scores_per_label[label_r][case_r] - scores_per_label[label_l][case_l]);
// if these cases have the same label
if (label_l == label_r) {
// add the distance to alike
sum_alike += distance;
// otherwise they have different labels
} else {
// add the distance to different
sum_different += distance;
}
}
}
// calculate and push ratio
ratios.push(sum_alike / sum_different);
// check for NaN replace with inf
if (isNaN(ratios[ratios.length - 1])) ratios[ratios.length - 1] = Infinity;
}
}
// average ratios
var ratio_sum = 0;
for (var r = 0; r < ratios.length; r++) {
ratio_sum += ratios[r];
}
ratio_sum /= ratios.length;
return ratio_sum;
};
function run_program_on_case (program, set) {
// copy the program to run it
var runner = program + '';
// create the register and the final score
var register = 0;
var score = 0;
// walk through program one step at time
while (runner.length > 0) {
//console.log('walk with runner(' + runner + ')');
// splice off command at head
var command = runner.substr(0, 1);
runner = runner.substr(1);
//console.log('walk after command(' + runner + ')');
// if there are numerals (vector X reference) trailling command
// then grab vector x
var vector_x = '';
// while we have a number at the head
while (runner.length > 0 && '0123456789'.indexOf(runner[0]) != -1) {
// append numeral to vector x and splice from head
vector_x += runner.substr(0, 1);
runner = runner.substr(1);
}
//console.log('walk after num(' + runner + ')');
// parse our numeral if we have one
if (vector_x.length > 0) vector_x = parseInt(vector_x);
// switch based on command
switch (command) {
case 'a': // register = 0
register = 0;
break;
case 'b': // register = register + vector x
register = register + set[vector_x];
break;
case 'c': // register = register - vector x
register = register - set[vector_x];
break;
case 'd': // register = vector x - register
register = set[vector_x] - register;
break;
case 'e': // register = register * vector x
register = register * set[vector_x];
break;
case 'f': // register = register / vector x
register = register / set[vector_x];
break;
case 'g': // register = vector x / register
register = set[vector_x] / register;
break;
case 'h': // register = register % vector x
register = register % set[vector_x];
break;
case 'i': // register = vector x % register
register = set[vector_x] % register;
break;
case 'j': // score = score + register
score = score + register;
break;
case 'k': // score = score - register
score = score - register;
break;
case 'l': // score = register - score
score = register - score;
break;
case 'm': // score = score * register
score = score * register;
break;
case 'n': // score = score / register
score = score / register;
break;
case 'o': // score = register / score
score = register / score;
break;
case 'p': // score = score % register
score = score % register;
break;
case 'q': // score = register % score
score = register % score;
break;
}
//console.log('command(' + command + '),vector_x(' + vector_x + '),set[vector_x](' + set[vector_x] + '),reg(' + register + '),score(' + score + ')');
}
//console.log('ran(' + program + '),set(' + set +'),score(' + score + ')');
// return final score
return score;
};
var score_set_and_prog = train(real, 0.08);
// test on a trainer
//console.log(run_one(score_set_and_prog[1], score_set_and_prog[2]['0'][0], score_set_and_prog[0]));
// run real test, save to readFileSync
console.log('running real test...');
var answers = run_multi(score_set_and_prog[1], real_test, score_set_and_prog[0]);
// number answers
for (var a = 0; a < answers.length; a++) {
answers[a] = (a + 1) + ',' + answers[a];
}
// seperate by newlines
answers = answers.join('\n');
// prepend header
answers = 'ImageId,Label\n' + answers;
require('fs').writeFileSync('answer' + new Date().getTime() + '.csv', answers, 'utf8');
console.log('done!');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment