Skip to content

Instantly share code, notes, and snippets.

@darcy
Created June 8, 2012 14:51
Show Gist options
  • Save darcy/2896009 to your computer and use it in GitHub Desktop.
Save darcy/2896009 to your computer and use it in GitHub Desktop.
MongoDB Trigram Search
// THIS SHOULD WORK AS COPY/PASTE into mongo console
var games = ["here is a test", "small test", "another game"];
var query = 'here is testing a test';
//////////////////////////////////////////////
// 1. util functions
var intersect = function(arr1, arr2) {
var r = [], o = {}, l = arr2.length, i, v;
for (i = 0; i < l; i++) {
o[arr2[i]] = true;
}
l = arr1.length;
for (i = 0; i < l; i++) {
v = arr1[i];
if (v in o) {
r.push(v);
}
}
return r;
};
var trigramize = function(s) {
//normalize string
var w = ' '+s.toLowerCase().trim();
//trigramize it
var tg = [];
for (var i=0; i<Math.min(s.length,20); i++) {
tg.push(w.substring(i,i+3));
}
// get the unique trigrams
// (http://www.shamasis.net/2009/09/fast-algorithm-to-find-unique-items-in-javascript-array/)
var o = {}, i, l = tg.length, r = [];
for(i=0; i<l;i+=1) o[tg[i]] = tg[i];
for(i in o) r.push(o[i]);
return r;
}
// 2. create games
db.foo.drop();
games.forEach(function(g) {
db.foo.save({name : g, trigrams : trigramize(g) });
});
// 3. search function
var search = function(s) {
var map = function() {
emit(this, { trigram_match : intersect(this.trigrams, trigrams).length });
};
var reduce=function(key, values) {
return values;
}
var trigram_query = trigramize(s);
var res_name = ObjectId().toString();
var res = db.foo.mapReduce(map,reduce,{
out: res_name, // we could make this inline so our results aren't global, however then you don't have collection functions like sort or limit, etc
query: {trigrams : {$in :trigram_query}}, // filter out only trigram 'hits'
scope : {
trigrams : trigram_query, // pass our query param to map/reduce
intersect : intersect // pass the intersect function helper
}
});
// res.find();
var results = db[res_name].find().sort({value: -1}).limit(20).toArray(); //map(function(t) { return t._id});
db[res_name].drop();
return results;
}
search(query);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment