Last active
October 20, 2021 22:17
-
-
Save trevorhreed/dc9d335800ccf4c2eb10df79f18f2463 to your computer and use it in GitHub Desktop.
A simple full-text-search algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Draft 2 | |
ngm.service('searchArray', function(){ | |
let THRESHHOLD = 0; | |
let FULL_EXACTMATCH_MODIFIER = 1.0; | |
let FULL_STARTSWITH_MODIFIER = 0.9; | |
let FULL_CONTAINS_MODIFIER = 0.3; | |
let FULL_PARTIAL_MODIFIER = 0.3; | |
let PRECISION = 2; | |
let re = /[\s\b]+/g; | |
let search = (term, arr, key) => { | |
let items = []; | |
if(!term) items = arr.map((item)=>{ | |
delete item.$rel; | |
return item; | |
}); | |
else{ | |
let lowerTerm = term.toLowerCase(); | |
let terms = lowerTerm.split(re).filter(Boolean); | |
arr.forEach((value)=>{ | |
let lowerValue = value[key].toLowerCase(); | |
let tokens = lowerValue.toLowerCase().split(re).filter(Boolean); | |
let exactMatchScore = FULL_EXACTMATCH_MODIFIER * (lowerValue === lowerTerm ? 1 : 0); | |
let startsWithScore = FULL_STARTSWITH_MODIFIER * (lowerValue !== lowerTerm && lowerValue.startsWith(lowerTerm) ? 1 : 0) * (lowerTerm.length / lowerValue.length); | |
let containsScore = FULL_CONTAINS_MODIFIER * (lowerTerm.length / lowerValue.length) * (lowerValue.indexOf(lowerTerm) > 0 ? 1 : 0); | |
let partialMatchScore = FULL_PARTIAL_MODIFIER * terms.map((term, i)=>{ | |
let exactMatchCount = tokens.filter(token => term === token).length ? 1 : 0; | |
let tokenStartsWithLength = tokens.reduce((acc, token)=>{ | |
if(!token.startsWith(term)) return acc; | |
return Math.max(token.length, acc); | |
}, 0); | |
let startsWithCount = tokenStartsWithLength ? (term.length / tokenStartsWithLength) : 0; | |
let tokenContainsLength = tokens.reduce((acc, token)=>{ | |
if(token.indexOf(term) < 1) return acc; | |
return Math.max(token.length, acc); | |
}, 0); | |
let containsCount = tokenContainsLength ? (term.length / tokenContainsLength) : 0; | |
return Math.max(exactMatchCount, startsWithCount, containsCount); | |
}).reduce((x, y) => x + y) / (tokens.length + terms.length); | |
value.$rel = { | |
exactMatchScore, | |
startsWithScore, | |
containsScore, | |
partialMatchScore, | |
toString(){ | |
return `r${this.valueOf().toFixed(PRECISION)} - e${this.exactMatchScore.toFixed(PRECISION)} - s${this.startsWithScore.toFixed(PRECISION)} - c${this.containsScore.toFixed(PRECISION)} - p${this.partialMatchScore.toFixed(PRECISION)}`; | |
}, | |
valueOf(){ | |
return Math.max( | |
this.exactMatchScore, | |
this.startsWithScore, | |
((this.containsScore + this.partialMatchScore) / 2) | |
); | |
} | |
}; | |
if(value.$rel > THRESHHOLD) items.push(value); | |
}); | |
} | |
return items.sort((a, b)=>{ | |
if(a.$rel > b.$rel) return -1; | |
if(a.$rel < b.$rel) return 1; | |
if(a[key] < b[key]) return -1; | |
if(a[key] > b[key]) return 1; | |
return 0; | |
}); | |
} | |
return search; | |
}); | |
// Draft 1 | |
ngm.service('searchArray', function(){ | |
let re = /[\s\b]+/g; | |
let threshhold = 0; | |
let search = (term, arr, key) => { | |
if(!term) return arr; | |
let items = []; | |
let lowerTerm = term.toLowerCase(); | |
let terms = lowerTerm.split(re).filter(Boolean); | |
arr.forEach((value)=>{ | |
let lowerValue = value[key].toLowerCase(); | |
let tokens = lowerValue.toLowerCase().split(re).filter(Boolean); | |
let partialMatchScore = tokens.map((token, i)=>{ | |
let exactMatchCount = 1.0 * (terms.filter(y => token === y).length / tokens.length); | |
let startsWithCount = 0.5 * (terms.filter(y => token.startsWith(y)).length / tokens.length); | |
let containsCount = 0.1 * (terms.filter(y => token.indexOf(y) > 0 && y.length > 1).length / tokens.length); | |
return (exactMatchCount + startsWithCount + containsCount) * ( 1 + (1/(i + 100)) ); | |
}).reduce((x, y) => x + y); | |
let exactMatchScore = 1.0 * (lowerValue === lowerTerm ? 1 : 0); | |
let startsWithScore = 0.1 * (lowerValue.startsWith(lowerTerm) ? 1 : 0); | |
let rel = ((partialMatchScore + startsWithScore + exactMatchScore) / 2.6).toFixed(5); | |
if(rel > threshhold) items.push({ rel, value, scores: { | |
partialMatchScore, | |
startsWithScore, | |
exactMatchScore | |
} }); | |
}); | |
return items.sort((a, b)=>{ | |
if(a.rel > b.rel) return -1; | |
if(a.rel < b.rel) return 1; | |
return 0; | |
}).map(x => x.value); | |
} | |
return search; | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment