Created
March 31, 2014 18:39
-
-
Save kkoch986/9899177 to your computer and use it in GitHub Desktop.
NaturalNode Wordnet vs Trie `isWord` lookups
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// NOTE: These are not rock-solid benchmarks, just a quick illustrative test. | |
var wordnet = new natural.WordNet(); | |
var fs = require("fs"); | |
var loops = 100; | |
var allWords = []; | |
// NOTE: This file should exist on most UNIX varieties | |
var dictionary = "/usr/share/dict/words"; | |
var trie = new natural.Trie(false); | |
// TRIE TEST | |
console.time("Build Trie"); | |
fs.readFile(dictionary, {"encoding":"ascii"}, function (err, data) { | |
if (err) throw err; | |
var words = data.split("\n"); | |
var size = data.replace("\n", "").trim().length; | |
console.log(words.length + " Words ("+size+" characters) Added."); | |
allWords = allWords.concat(words); | |
trie.addStrings(words); | |
console.log("Number of words: ", allWords.length); | |
console.log("Number of passes: ", loops); | |
console.log("Total Lookups: ", allWords.length * loops); | |
console.timeEnd("Build Trie"); | |
console.log(trie.getSize()); | |
// Do the lookups | |
console.time("lookups"); | |
var c = 0; | |
for(var i = 0 ; i < loops ; i++) { | |
for(var w in allWords) { | |
trie.contains(allWords[w]); | |
c++; | |
} | |
console.log("finished loop: ", i); | |
} | |
console.timeEnd("lookups"); | |
console.log(c + " total lookups"); | |
}); | |
// WORDNET Test | |
var dictionary = "/usr/share/dict/words"; | |
fs.readFile(dictionary, {"encoding":"ascii"}, function (err, data) { | |
if (err) throw err; | |
var words = data.split("\n"); | |
allWords = allWords.concat(words); | |
function lookup(i) { | |
if(i === 0) { | |
return ; | |
} | |
if(i % 1000 === 0) { | |
console.log("I:", i); | |
} | |
wordnet.lookup(allWords[i%allWords.length], function(){ | |
setTimeout(function(){ lookup(i-1); }, 0); | |
}); | |
} | |
console.time("wordnet lookup"); | |
lookup(loops * allWords.length, function(){ | |
console.endTime("wordnet lookup"); | |
}); | |
}); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment