Skip to content

Instantly share code, notes, and snippets.

@bencmbrook
Last active July 19, 2022 16:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bencmbrook/34ff7d264d8a5ef3b6da0c84241d80f0 to your computer and use it in GitHub Desktop.
Save bencmbrook/34ff7d264d8a5ef3b6da0c84241d80f0 to your computer and use it in GitHub Desktop.
Fuzzy lookup Google Sheets (extended from https://github.com/nyanlynntherazi/ZLOOKUP)
const exclusions = [
' Inc.',
', Inc.',
'The ',
'.com',
];
/**
* Exclude words from matching score. See exclusions above.
*/
function excludeWords(input) {
let output = input;
exclusions.forEach((exclusion) => {
const regex = new RegExp(exclusion, 'gi');
output = output.replace(regex, '');
});
return output;
}
/**
* Fuzzy String Matching
*
* @param {value,column,index,threshold} input The value to fuzzy match
* @return The index
* @customfunction
*/
function ZLOOKUP(value, column, index, threshold) {
const formattedValue = excludeWords(value);
let accArr = []
for(i=0;i<column.length;++i){
const formattedColumn = column[i][0]
accArr.push([column[i][0],LEVENSHTEIN(formattedColumn,formattedValue)])
}
let scoreArr = accArr.map( elem => elem[1])
if (Math.max(...scoreArr)>threshold){
let calculatedIndex = scoreArr.indexOf(Math.max(...scoreArr))
return column[calculatedIndex][index-1]
} else {
throw "Error: No match in column above threshold";
}
}
/**
* Levenshtein Distance Implementation
* from https://gist.github.com/andrei-m/982927/0efdf215b00e5d34c90fdc354639f87ddc3bd0a5
*
* @param {a,b} input The strings to compare
* @return The Levenshtein Distance
* @customfunction
*/
//
function LEVENSHTEIN(a, b){
a = String(a)
b = String(b)
if(a.length == 0) return b.length;
if(b.length == 0) return a.length;
var matrix = [];
// increment along the first column of each row
var i;
for(i = 0; i <= b.length; i++){
matrix[i] = [i];
}
// increment each column in the first row
var j;
for(j = 0; j <= a.length; j++){
matrix[0][j] = j;
}
// Fill in the rest of the matrix
for(i = 1; i <= b.length; i++){
for(j = 1; j <= a.length; j++){
if(b.charAt(i-1) == a.charAt(j-1)){
matrix[i][j] = matrix[i-1][j-1];
} else {
matrix[i][j] = Math.min(matrix[i-1][j-1] + 1, // substitution
Math.min(matrix[i][j-1] + 1, // insertion
matrix[i-1][j] + 1)); // deletion
}
}
}
return (1-matrix[b.length][a.length]/Math.max(a.length,b.length))*100;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment