Skip to content

Instantly share code, notes, and snippets.

@jonarddoci
Created June 17, 2019 06:04
Show Gist options
  • Save jonarddoci/7193c20e0da9965b6fdcbf0a80f7fcb4 to your computer and use it in GitHub Desktop.
Save jonarddoci/7193c20e0da9965b6fdcbf0a80f7fcb4 to your computer and use it in GitHub Desktop.
scrappy trie, nodes with data, multiple key lookup implementation (duplicates not handled yet)
<!-- does not handle duplicates well, need to use hashing to fix that -->
<!doctype html>
<html>
<head>
<meta charset="utf-8">
<title>test</title>
<base href="/">
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
<script>
var dataSource = [
{
type: "residential",
locationName: "home",
postalCode: "12340"
},
{
type: "office",
locationName: "my hq",
postalCode: "90001"
},
{
type: "office",
locationName: "my office1",
postalCode: "10002"
},
{
type: "office",
locationName: "my office2",
postalCode: "10003"
}
];
var Trie = {
root: null
};
var locationKeyMakers = [
function(data) {
return data.locationName.split(/\W+/).filter(function(value) {return !!value;});
},
function(data) {
return [data.locationName];
},
function(data) {
return [data.postalCode];
}
];
function addData(trie, data, keyMakerList) {
var keys = {};
keyMakerList.map(function(keyMaker) {return keyMaker(data)} ).reduce(function(o1, o2) {return o1.concat(o2)})
.forEach(function(key) {keys[key] = key});
for (var key in keys) {
if (keys.hasOwnProperty(key)) {
insertTrieNode(trie, data, key);
}
}
}
function insertTrieNode(trie, data, key) {
if(!key || !data || !trie) {
return;
}
if (!trie.root) {
trie.root = {
key: '',
children: {}
};
}
var iterator = trie.root;
var parent = trie.root;
for (var i = 0; i < key.length; i++) {
var charAt = key[i];
if (charAt in iterator.children) {
if (i === key.length - 1) {
iterator.children[charAt].data = iterator.children[charAt].data.concat(data)
} else {
iterator = iterator.children[charAt];
parent = iterator;
}
} else {
if (i === (key.length - 1)) {
parent.children[charAt] = {
key: charAt,
data: [data],
children: {}
}
} else {
parent.children[charAt] = {
key: charAt,
children: {}
};
iterator = parent.children[charAt];
parent = iterator;
}
}
}
}
function gatherChildren(trieNode) {
if (!trieNode) {
return [];
}
var iterator = trieNode;
if (!iterator.children) {
if (iterator.data) {
return iterator.data;
} else {
return [];
}
}
results = [];
if (iterator.data) {
results = results.concat(iterator.data);
}
for (var childKey in iterator.children) {
if (iterator.children.hasOwnProperty(childKey)) {
results = results.concat(gatherChildren(iterator.children[childKey]));
}
}
return results;
}
function prefixSearch(trie, prefix) {
if (!trie || !trie.root || !prefix) {
return [];
}
var iterator = trie.root;
for (var i = 0; i < prefix.length; i++) {
var charAt = prefix[i];
if (!iterator.children[charAt]) {
return [];
}
if (i === (prefix.length-1)) {
return gatherChildren(iterator.children[charAt])
} else {
iterator = iterator.children[charAt];
}
}
}
addData(Trie, dataSource[0], locationKeyMakers);
addData(Trie, dataSource[1], locationKeyMakers);
addData(Trie, dataSource[2], locationKeyMakers);
console.log(Trie);
// should return all offices, since they start with my
console.log(prefixSearch(Trie, 'my'));
// should return hq
console.log(prefixSearch(Trie, '9'));
// should return home and hq
console.log(prefixSearch(Trie, 'h'));
console.log(prefixSearch(Trie, 'my office1'));
function benchmark() {
var searchData = buildBenchmarkDataSource();
var prefixSearches = buildBenchmarkSearchStrings();
// build trie
var startDate = new Date().getTime();
var benchMarkTrie = {};
searchData.forEach(function (value) { addData(benchMarkTrie, value, [function(data) {return [data.locationName]}])});
var endDate = new Date().getTime();
console.log(" trie build: ", endDate - startDate);
// end build trie
////////////////
//////////////// ARRAY SEARCH
////////////////
startDate = new Date().getTime();
var arraySearchResults = {};
for (var i = 0; i < prefixSearches.length; i++) {
arraySearchResults[i] = arraySearch(searchData, prefixSearches[i]);
}
endDate = new Date().getTime();
console.log(" array search: ", endDate - startDate);
////////////////
//////////////// ARRAY END
////////////////
////////////////
//////////////// TRIE SEARCH
////////////////
startDate = new Date().getTime();
var trieSearchResults = {};
for (var j = 0; j < prefixSearches.length; j++) {
trieSearchResults[j] = prefixSearch(benchMarkTrie, prefixSearches[j]);
}
endDate = new Date().getTime();
console.log(" TRIE search: ", endDate - startDate);
////////////////
//////////////// TRIE END
////////////////
for (var k = 0; k < prefixSearches.length; k++) {
if (arraySearchResults[k].length !== trieSearchResults[k].length) {
console.log(k, ' results do not match query:', prefixSearches[k], 'array results ', arraySearchResults[k], 'trie results', trieSearchResults[k])
}
}
}
function buildBenchmarkSearchStrings() {
var data = [];
for (var i = 1; i < 1000; i++) {
data.push(getRandomString(2));
}
return data;
}
function buildBenchmarkDataSource() {
var data = [];
for (var i = 0; i < 100000; i++) {
data.push({
locationName: getRandomString(),
postalCode: getRandomString(),
type: getRandomString()
})
}
return data;
}
function arraySearch(array, prefix) {
var results = [];
for (var i = 0; i < array.length; i++) {
var el = array[i];
if (el.locationName.startsWith(prefix)) {
results.push(el);
}
}
return results;
}
function getRandomString(length) {
length = length || 20;
return (Math.random().toString(36) + Math.random().toString(36) + Math.random().toString(36) + Math.random().toString(36)).substring(2, length + 2);
}
// benchmark();
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment