Skip to content

Instantly share code, notes, and snippets.

@hitsujiwool
Created July 15, 2012 04:33
Show Gist options
  • Save hitsujiwool/3115005 to your computer and use it in GitHub Desktop.
Save hitsujiwool/3115005 to your computer and use it in GitHub Desktop.
IBM Model 1
/*
* IBM Model 1
*/
(function(exports) {
function train(sentencePairs, iteration) {
iteration = iteration || 100;
var count,
total,
t = tFunc(),
allForeignWords = {},
allEnglishWords = {};
function tokenize(sentence) {
return sentence.split(' ');
}
function countFunc() {
var count = {};
return function(f, e, val) {
if (typeof val === 'undefined') {
return (count[f] = count[f] || {})[e] || 0;
} else {
(count[f] = count[f] || {})[e] = val;
return val;
}
};
};
function totalFunc() {
var total = {};
return function(f, val) {
if (typeof val === 'undefined') {
return total[f] || 0;
} else {
total[f] = val;
return val;
}
};
}
function tFunc() {
var table = {};
function func(f, e, val) {
if (typeof val === 'undefined') {
return (table[f] = table[f] || {})[e] || 0;
} else {
(table[f] = table[f] || {})[e] = val;
return val;
}
};
func.get = function() {
return table;
};
return func;
}
// tokenize
sentencePairs = sentencePairs.map(function(pair) {
return pair.map(function(sentence) {
return tokenize(sentence);
});
});
sentencePairs.forEach(function(pair) {
var fWords = pair[0],
eWords = pair[1];
fWords.forEach(function(f) {
if (!(allForeignWords[f])) allForeignWords[f] = true;
});
eWords.forEach(function(e) {
if (!(allEnglishWords[e])) allEnglishWords[e] = true;
});
});
/**
* Learning lexical translation Models by EM Algorithum.
*/
// Initialize t(f, e) uniformly.
Object.keys(allForeignWords).forEach(function(f) {
Object.keys(allEnglishWords).forEach(function(e) {
t(f, e, 1 / Object.keys(allEnglishWords).length);
});
});
// Start iteration.
for (var i = 0; i < iteration; i++) {
count = countFunc();
total = totalFunc();
sentencePairs.forEach(function(pair) {
var fWords = pair[0],
eWords = pair[1],
sTotal = {};
eWords.forEach(function(e) {
sTotal[e] = fWords.reduce(function(res, f) {
return res + t(f, e);
}, 0);
});
eWords.forEach(function(e) {
fWords.forEach(function(f) {
count(f, e, count(f, e) + t(f, e) / sTotal[e]);
total(f, total(f) + t(f, e) / sTotal[e]);
});
});
});
// Update translation table.
Object.keys(allForeignWords).forEach(function(f) {
Object.keys(allEnglishWords).forEach(function(e) {
t(f, e, count(f, e) / total(f));
});
});
}
return t;
}
exports.train = train;
})('object' === typeof module ? module.exports : (this.ibmModel1 = {}));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment