Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
This is just a browser friendly version of https://github.com/StephanGeorg/trigram-similarity
const nGram = function(n) {
if (
typeof n !== 'number' ||
Number.isNaN(n) ||
n < 1 ||
n === Number.POSITIVE_INFINITY
) {
throw new Error('`' + n + '` is not a valid argument for `n-gram`')
}
return grams
/**
* Create n-grams from a given value.
*
* @template {string|string[]} T
* @param {T} [value]
* @returns {T[]}
*/
function grams(value) {
/** @type {T[]} */
var nGrams = []
/** @type {number} */
var index
/** @type {string|string[]} */
var source
if (value === null || value === undefined) {
return nGrams
}
source = value.slice ? value : String(value)
index = source.length - n + 1
if (index < 1) {
return nGrams
}
while (index--) {
// @ts-ignore
nGrams[index] = source.slice(index, index + n)
}
return nGrams
}
};
const bigram = nGram(2);
const trigram = nGram(3);
/**
* Two blank spaces are added at the beginning, and one at the end,
* and single spaces are replaced by double ones.
* @param {string} input
* @returns {string}
*/
const convertString = (input = '') => {
if (!input.trim()) return '';
return ` ${input
.trim()
.replace(/\s+/g, ' ') // replace multiple spaces w/ single spaces
.replace(/\s/g, ' ')} ` // replace single spaces w/ double spaces
.toLowerCase();
};
/**
* Sorting them, and taking out repetitions (via Set)
* @param {string} input
* @returns {string}
*/
const generateTrigram = (input = '') => [...new Set(trigram(convertString(input))
.filter((trigramItem) => !/^[\p{Letter}\p{Mark}0-9]\s\s$/giu.test(trigramItem)))];
/**
* Calculate trigram similarity between 2 strings
* @param {string} input1
* @param {string} input2
* @returns {number}
*/
const trigramSimilarity = (input1 = '', input2 = '') => {
const trigrams1 = generateTrigram(input1);
const trigrams2 = generateTrigram(input2);
// Total trigrams
const total = [...new Set([...trigrams1, ...trigrams2])];
// Trigrams both have in common
const common = [];
trigrams1.forEach((trigramItem) => {
if (trigrams2.includes(trigramItem)) common.push(trigramItem);
});
return (total.length === 0)
? 0
: common.length / total.length;
};
console.log(trigramSimilarity('Chateau blanc', 'chateau cheval blanc')); // 0.7368421052631579
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment