Created
October 15, 2011 18:44
-
-
Save phillro/1289964 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module.exports = textUtil = function() { | |
var self = this; | |
self.fieldRelevancyValues = { | |
distance: {value:.10, type:'-exp'}, | |
nameLev : {value:.30}, | |
zip: {value:.40} | |
} | |
self.qualityScore = function (fieldValueMap){ | |
var score=0 | |
for(field in fieldValueMap){ | |
if(self.fieldRelevancyValues[field]){ | |
if(self.fieldRelevancyValues[field].type=='-exp'){ | |
var raw =Math.exp(-fieldValueMap[field]) | |
var val =self.fieldRelevancyValues[field].value*raw | |
console.log(val) | |
score+=val | |
}else{ | |
score+=self.fieldRelevancyValues[field].value*fieldValueMap[field] | |
} | |
} | |
} | |
return score | |
} | |
self.stringNormalize = function(str) { | |
str = str.replace(/\&/, ' and ') | |
str = str.replace(/[^A-Za-z0-9 ]+/g, '').toLowerCase() | |
return str | |
} | |
/** | |
* calculates levenstein as a percentage match | |
* @param s1 | |
* @param s2 | |
*/ | |
self.levPercent = function(s1, s2) { | |
var levValue = self.lev(s1, s2) | |
var big = s1.length >= s2.length ? s1 : s2 | |
return 1 - levValue / big.length | |
} | |
self.lev = function(s1, s2) { | |
// Calculate Levenshtein distance between two strings | |
// | |
// version: 1109.2015 | |
// discuss at: http://phpjs.org/functions/levenshtein | |
// + original by: Carlos R. L. Rodrigues (http://www.jsfromhell.com) | |
// + bugfixed by: Onno Marsman | |
// + revised by: Andrea Giammarchi (http://webreflection.blogspot.com) | |
// + reimplemented by: Brett Zamir (http://brett-zamir.me) | |
// + reimplemented by: Alexander M Beedie | |
// * example 1: levenshtein('Kevin van Zonneveld', 'Kevin van Sommeveld'); | |
// * returns 1: 3 | |
if (s1 == s2) { | |
return 0; | |
} | |
var s1_len = s1.length; | |
var s2_len = s2.length; | |
if (s1_len === 0) { | |
return s2_len; | |
} | |
if (s2_len === 0) { | |
return s1_len; | |
} | |
// BEGIN STATIC | |
var split = false; | |
try { | |
split = !('0')[0]; | |
} catch (e) { | |
split = true; // Earlier IE may not support access by string index | |
} | |
// END STATIC | |
if (split) { | |
s1 = s1.split(''); | |
s2 = s2.split(''); | |
} | |
var v0 = new Array(s1_len + 1); | |
var v1 = new Array(s1_len + 1); | |
var s1_idx = 0, | |
s2_idx = 0, | |
cost = 0; | |
for (s1_idx = 0; s1_idx < s1_len + 1; s1_idx++) { | |
v0[s1_idx] = s1_idx; | |
} | |
var char_s1 = '', | |
char_s2 = ''; | |
for (s2_idx = 1; s2_idx <= s2_len; s2_idx++) { | |
v1[0] = s2_idx; | |
char_s2 = s2[s2_idx - 1]; | |
for (s1_idx = 0; s1_idx < s1_len; s1_idx++) { | |
char_s1 = s1[s1_idx]; | |
cost = (char_s1 == char_s2) ? 0 : 1; | |
var m_min = v0[s1_idx + 1] + 1; | |
var b = v1[s1_idx] + 1; | |
var c = v0[s1_idx] + cost; | |
if (b < m_min) { | |
m_min = b; | |
} | |
if (c < m_min) { | |
m_min = c; | |
} | |
v1[s1_idx + 1] = m_min; | |
} | |
var v_tmp = v0; | |
v0 = v1; | |
v1 = v_tmp; | |
} | |
return v0[s1_len]; | |
} | |
return self; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment