Instantly share code, notes, and snippets.

Embed
What would you like to do?
_nonWordRe = /[^a-zA-Z0-9\u00C0-\u00FF, ]+/g;
_iterateGrams = function(value, gramSize) {
gramSize = gramSize || 2;
var simplified = '-' + value.toLowerCase().replace(_nonWordRe, '') + '-',
lenDiff = gramSize - simplified.length,
results = [];
if (lenDiff > 0) {
for (var i = 0; i < lenDiff; ++i) {
value += '-';
}
}
for (var i = 0; i < simplified.length - gramSize + 1; ++i) {
results.push(simplified.slice(i, i + gramSize));
}
return results;
};
_gramCounter = function(value, gramSize) {
// return an object where key=gram, value=number of occurrences
gramSize = gramSize || 2;
var result = {},
grams = _iterateGrams(value, gramSize),
i = 0;
for (i; i < grams.length; ++i) {
if (grams[i] in result) {
result[grams[i]] += 1;
} else {
result[grams[i]] = 1;
}
}
return result;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment