Skip to content

Instantly share code, notes, and snippets.

@phillro
Created October 15, 2011 18:44
Show Gist options
  • Save phillro/1289964 to your computer and use it in GitHub Desktop.
Save phillro/1289964 to your computer and use it in GitHub Desktop.
module.exports = textUtil = function() {
var self = this;
self.fieldRelevancyValues = {
distance: {value:.10, type:'-exp'},
nameLev : {value:.30},
zip: {value:.40}
}
self.qualityScore = function (fieldValueMap){
var score=0
for(field in fieldValueMap){
if(self.fieldRelevancyValues[field]){
if(self.fieldRelevancyValues[field].type=='-exp'){
var raw =Math.exp(-fieldValueMap[field])
var val =self.fieldRelevancyValues[field].value*raw
console.log(val)
score+=val
}else{
score+=self.fieldRelevancyValues[field].value*fieldValueMap[field]
}
}
}
return score
}
self.stringNormalize = function(str) {
str = str.replace(/\&/, ' and ')
str = str.replace(/[^A-Za-z0-9 ]+/g, '').toLowerCase()
return str
}
/**
* calculates levenstein as a percentage match
* @param s1
* @param s2
*/
self.levPercent = function(s1, s2) {
var levValue = self.lev(s1, s2)
var big = s1.length >= s2.length ? s1 : s2
return 1 - levValue / big.length
}
self.lev = function(s1, s2) {
// Calculate Levenshtein distance between two strings
//
// version: 1109.2015
// discuss at: http://phpjs.org/functions/levenshtein
// + original by: Carlos R. L. Rodrigues (http://www.jsfromhell.com)
// + bugfixed by: Onno Marsman
// + revised by: Andrea Giammarchi (http://webreflection.blogspot.com)
// + reimplemented by: Brett Zamir (http://brett-zamir.me)
// + reimplemented by: Alexander M Beedie
// * example 1: levenshtein('Kevin van Zonneveld', 'Kevin van Sommeveld');
// * returns 1: 3
if (s1 == s2) {
return 0;
}
var s1_len = s1.length;
var s2_len = s2.length;
if (s1_len === 0) {
return s2_len;
}
if (s2_len === 0) {
return s1_len;
}
// BEGIN STATIC
var split = false;
try {
split = !('0')[0];
} catch (e) {
split = true; // Earlier IE may not support access by string index
}
// END STATIC
if (split) {
s1 = s1.split('');
s2 = s2.split('');
}
var v0 = new Array(s1_len + 1);
var v1 = new Array(s1_len + 1);
var s1_idx = 0,
s2_idx = 0,
cost = 0;
for (s1_idx = 0; s1_idx < s1_len + 1; s1_idx++) {
v0[s1_idx] = s1_idx;
}
var char_s1 = '',
char_s2 = '';
for (s2_idx = 1; s2_idx <= s2_len; s2_idx++) {
v1[0] = s2_idx;
char_s2 = s2[s2_idx - 1];
for (s1_idx = 0; s1_idx < s1_len; s1_idx++) {
char_s1 = s1[s1_idx];
cost = (char_s1 == char_s2) ? 0 : 1;
var m_min = v0[s1_idx + 1] + 1;
var b = v1[s1_idx] + 1;
var c = v0[s1_idx] + cost;
if (b < m_min) {
m_min = b;
}
if (c < m_min) {
m_min = c;
}
v1[s1_idx + 1] = m_min;
}
var v_tmp = v0;
v0 = v1;
v1 = v_tmp;
}
return v0[s1_len];
}
return self;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment