Last active
July 19, 2022 16:24
-
-
Save bencmbrook/34ff7d264d8a5ef3b6da0c84241d80f0 to your computer and use it in GitHub Desktop.
Fuzzy lookup Google Sheets (extended from https://github.com/nyanlynntherazi/ZLOOKUP)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const exclusions = [ | |
' Inc.', | |
', Inc.', | |
'The ', | |
'.com', | |
]; | |
/** | |
* Exclude words from matching score. See exclusions above. | |
*/ | |
function excludeWords(input) { | |
let output = input; | |
exclusions.forEach((exclusion) => { | |
const regex = new RegExp(exclusion, 'gi'); | |
output = output.replace(regex, ''); | |
}); | |
return output; | |
} | |
/** | |
* Fuzzy String Matching | |
* | |
* @param {value,column,index,threshold} input The value to fuzzy match | |
* @return The index | |
* @customfunction | |
*/ | |
function ZLOOKUP(value, column, index, threshold) { | |
const formattedValue = excludeWords(value); | |
let accArr = [] | |
for(i=0;i<column.length;++i){ | |
const formattedColumn = column[i][0] | |
accArr.push([column[i][0],LEVENSHTEIN(formattedColumn,formattedValue)]) | |
} | |
let scoreArr = accArr.map( elem => elem[1]) | |
if (Math.max(...scoreArr)>threshold){ | |
let calculatedIndex = scoreArr.indexOf(Math.max(...scoreArr)) | |
return column[calculatedIndex][index-1] | |
} else { | |
throw "Error: No match in column above threshold"; | |
} | |
} | |
/** | |
* Levenshtein Distance Implementation | |
* from https://gist.github.com/andrei-m/982927/0efdf215b00e5d34c90fdc354639f87ddc3bd0a5 | |
* | |
* @param {a,b} input The strings to compare | |
* @return The Levenshtein Distance | |
* @customfunction | |
*/ | |
// | |
function LEVENSHTEIN(a, b){ | |
a = String(a) | |
b = String(b) | |
if(a.length == 0) return b.length; | |
if(b.length == 0) return a.length; | |
var matrix = []; | |
// increment along the first column of each row | |
var i; | |
for(i = 0; i <= b.length; i++){ | |
matrix[i] = [i]; | |
} | |
// increment each column in the first row | |
var j; | |
for(j = 0; j <= a.length; j++){ | |
matrix[0][j] = j; | |
} | |
// Fill in the rest of the matrix | |
for(i = 1; i <= b.length; i++){ | |
for(j = 1; j <= a.length; j++){ | |
if(b.charAt(i-1) == a.charAt(j-1)){ | |
matrix[i][j] = matrix[i-1][j-1]; | |
} else { | |
matrix[i][j] = Math.min(matrix[i-1][j-1] + 1, // substitution | |
Math.min(matrix[i][j-1] + 1, // insertion | |
matrix[i-1][j] + 1)); // deletion | |
} | |
} | |
} | |
return (1-matrix[b.length][a.length]/Math.max(a.length,b.length))*100; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment