Skip to content

Instantly share code, notes, and snippets.

@alphatr
Created April 7, 2016 03:36
Show Gist options
  • Save alphatr/2cb7476659d388f91c2bf5efe182bc31 to your computer and use it in GitHub Desktop.
Save alphatr/2cb7476659d388f91c2bf5efe182bc31 to your computer and use it in GitHub Desktop.
比较两个文本的相似度
var compare = (function () {
"use strict";
var splitText = function (text) {
var map = {}, word = "", prevType = "";
var typeList = {
'number': /[-0-9\.]/,
'alphabet': /[-'A-Z]/i,
'han': /[\u4E00-\u9FA5]/
};
var blockList = /^[0-9的呢了]$/;
text += " ";
var i = 0, j = 0, char = "", nowType = "";
for (i = 0; i < text.length; i++) {
char = text.charAt(i).toLowerCase();
nowType = "";
for (j in typeList) {
if (typeList[j].test(char)) {
nowType = j;
break;
}
}
if (prevType !== nowType || nowType === 'han') {
/* 新的开始 */
if (word && prevType && !blockList.test(word)) {
if (map[word]) {
map[word]++;
} else {
map[word] = 1;
}
}
word = char;
prevType = nowType;
} else {
word += char;
}
}
return map;
};
var beyondMap = function (mapList) {
var map = {};
for (var i = 0; i < mapList.length; i++) {
for (var j in mapList[i]) {
if (!map[j]) {
map[j] = new Array(mapList.length).fill(0);
}
map[j][i]++;
}
}
return map;
};
var cos = function (map) {
var deno1 = 0, deno2 = 0, nume = 0;
for (var i in map) {
var sub1 = map[i][0], sub2 = map[i][1];
deno1 += sub1 * sub1;
deno2 += sub2 * sub2;
nume += sub1 * sub2;
}
return nume / (Math.sqrt(deno1) * Math.sqrt(deno2));
};
var beyond = function (text1, text2) {
return cos(beyondMap([
splitText(text1),
splitText(text2)
]));
};
}());
// 调用比较两个文本
compare("text1", 'text2');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment