Skip to content

Instantly share code, notes, and snippets.

@robertknight
Created April 18, 2013 05:46
Show Gist options
  • Save robertknight/5410420 to your computer and use it in GitHub Desktop.
Save robertknight/5410420 to your computer and use it in GitHub Desktop.
function termFreqMap(str) {
var words = str.split(' ');
var termFreq = {};
words.forEach(function(w) {
termFreq[w] = (termFreq[w] || 0) + 1;
});
return termFreq;
}
function addKeysToDict(map, dict) {
for (var key in map) {
dict[key] = true;
}
}
function termFreqMapToVector(map, dict) {
var termFreqVector = [];
for (var term in dict) {
termFreqVector.push(map[term] || 0);
}
return termFreqVector;
}
function vecDotProduct(vecA, vecB) {
var product = 0;
for (var i = 0; i < vecA.length; i++) {
product += vecA[i] * vecB[i];
}
return product;
}
function vecMagnitude(vec) {
var sum = 0;
for (var i = 0; i < vec.length; i++) {
sum += vec[i] * vec[i];
}
return Math.sqrt(sum);
}
function cosineSimilarity(vecA, vecB) {
return vecDotProduct(vecA, vecB) / (vecMagnitude(vecA) * vecMagnitude(vecB));
}
function textCosineSimilarity(strA, strB) {
var termFreqA = termFreqMap(strA);
var termFreqB = termFreqMap(strB);
var dict = {};
addKeysToDict(termFreqA, dict);
addKeysToDict(termFreqB, dict);
var termFreqVecA = termFreqMapToVector(termFreqA, dict);
var termFreqVecB = termFreqMapToVector(termFreqB, dict);
return cosineSimilarity(termFreqVecA, termFreqVecB);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment