Skip to content

Instantly share code, notes, and snippets.

@iwarshak
Created March 22, 2010 19:24
Show Gist options
  • Save iwarshak/340433 to your computer and use it in GitHub Desktop.
Save iwarshak/340433 to your computer and use it in GitHub Desktop.
gists for mongo mapreduce post
class Book
include MongoMapper::Document
CONTEXTS = ['authors', 'rating','keywords', 'genre']
CONTEXTS.each do |context|
key context, Array, :index => true
end
key :title, String<br />
key :contexts, Array<br />
before_create :set_contexts
def set_contexts
self.contexts = CONTEXTS
end
end
require 'active_support'
require 'benchmark'
O = [('a'..'z'),('A'..'Z')].map{|i| i.to_a}.flatten
def generate_word_array(number = 5, length = 5)
string = (0..50).map{ O[rand(O.length)] }.join
number.times.collect { (0..length).map{ O[rand(O.length)] }.join }
end
AUTHORS = 20_000.times.collect { generate_word_array(1) }
KEYWORDS = 1_000.times.collect { generate_word_array(1) }
GENRE = 1_000.times.collect { generate_word_array(1) }
TITLES = 100_000.times.collect {generate_word_array(1, 10) }
# With 100k records
Book.collection.remove # delete all existing records
100_000.times do
book = Book.new(:title => TITLES.rand, :authors => AUTHORS.rand.flatten, :keywords => 10.times.collect {KEYWORDS.rand}.flatten, :rating => [1,2,3,4,5].rand, :genre => 2.times.collect {GENRE.rand}.flatten)
book.save
end
# irb(main):202:0> Benchmark.measure { Book.facet_search("authors" => {"$in" => AUTHORS.rand}) }
# => #<Benchmark::Tms:0x132c07a28 @cstime=0.0, @total=0.0, @cutime=0.0, @label="", @stime=0.0, @real=0.0468628406524658, @utime=0.0>
#
# Benchmark.measure { puts Book.facet_search('rating' => {"$in" => [3,4,5]}, 'keywords' => {'$in' => KEYWORDS.rand}) }
# => #<Benchmark::Tms:0x132d5af38 @cstime=0.0, @total=0.390000000000004, @cutime=0.0, @label="", @stime=0.0199999999999996, @real=3.84707593917847, @utime=0.370000000000005>
# With 1 Million records
Book.collection.remove
1_000_000.times do
book = Book.new(:title => TITLES.rand, :authors => AUTHORS.rand.flatten, :keywords => 10.times.collect {KEYWORDS.rand}.flatten, :rating => [1,2,3,4,5].rand, :genre => 2.times.collect {GENRE.rand}.flatten)
book.save
end
# irb(main):233:0> Benchmark.measure { Book.facet_search("authors" => {"$in" => AUTHORS.rand}) }
# => #<Benchmark::Tms:0x132fc3c48 @cstime=0.0, @total=0.0400000000001981, @cutime=0.0, @label="", @stime=0.00999999999999801, @real=0.588751077651978, @utime=0.0300000000002001>
#
# irb(main):235:0> Benchmark.measure { puts Book.facet_search('rating' => {"$in" => [3,4,5]}, 'keywords' => {'$in' => KEYWORDS.rand}) }
# => #<Benchmark::Tms:0x132702ac8 @cstime=0.0, @total=0.689999999999856, @cutime=0.0, @label="", @stime=0.0300000000000011, @real=43.1640980243683, @utime=0.659999999999854>
require 'rubygems'
require 'mongo_mapper'
MongoMapper.connection = Mongo::Connection.new('localhost')
MongoMapper.database = 'books'
class Book
include MongoMapper::Document
CONTEXTS = ['authors', 'rating','keywords', 'genre']
CONTEXTS.each do |context|
key context, Array, :index => true
end
key :title, String
key :contexts, Array
before_create :set_contexts
def set_contexts
self.contexts = CONTEXTS
end
def self.facet_search(query = {})
map = <<-MAP
function() {
var that = this;
this.contexts.forEach(function(context) {
that[context].forEach(function(tag) {
print('!!!!!emitting. tag: ' + tag + ', { ' + context +' : 1 }');
t = {};
t[context] = 1
emit(tag, t)
});
});
}
MAP
reduce = <<-REDUCE
function(tag, values) {
res = {};
print('!!tag: ' + tag + ' values: ' + tojson(values));
values.forEach(function(tuple) {
for(context in tuple) {
if(res[context] === undefined) {
print(tag + ' is undefined for ' + context + ' setting to ' + tuple[context]);
res[context] = tuple[context];
} else {
print(tag + ' is currently ' + res[context] + ' incrementing by ' + tuple[context]);
res[context] += tuple[context];
}
}
});
print("returning tag: " + tag + " values: " + tojson(res));
return res;
}
REDUCE
sort_facets(self.collection.map_reduce(map, reduce,{:query => query }))
end
private
def self.sort_facets(t)
contexts = {}
t.find.each do |res|
res["value"].keys.each do |ctxt|
contexts[ctxt] ||= {}
contexts[ctxt][res['_id']] ||= 0
contexts[ctxt][res['_id']] += res["value"][ctxt]
end
end
contexts
end
end
# Book.create(:title => 'Jurassic Park', :author => 'Michael Crichton', :authors => ['Michael Crichton'], :genre => ['fiction'], :keywords => ['velociraptor', 'clever girl'], :rating => [4])
# Book.create(:title => 'Sphere', :author => 'Michael Crichton', :authors => ['Michael Crichton'], :genre => ['fiction'], :keywords => ['ocean'], :rating => [5])
# Book.create(:title => 'The Firm', :author => 'John Grisham', :authors => ['John Grisham'], :genre => ['fiction'], :keywords => ['law', 'lawyer'], :rating => [4])
# irb(main):237:0> Book.facet_search("authors" => {"$in" => ['John Grisham']})
# => {"rating"=>{4.0=>1.0}, "genre"=>{"fiction"=>1.0}, "authors"=>{"John Grisham"=>1.0}, "keywords"=>{"law"=>1.0, "lawyer"=>1.0}}
#
# irb(main):241:0> Book.facet_search("authors" => {"$in" => ['Michael Crichton']}, :rating => {"$in" => [5]})
# => {"rating"=>{5.0=>1.0}, "genre"=>{"fiction"=>1.0}, "authors"=>{"Michael Crichton"=>1.0}, "keywords"=>{"ocean"=>1.0}}
Book.create(:title => 'Jurassic Park', :author => 'Michael Crichton', :authors => ['Michael Crichton'], :genre => ['fiction'], :keywords => ['velociraptor', 'clever girl'], :rating => [4])
Book.create(:title => 'Sphere', :author => 'Michael Crichton', :authors => ['Michael Crichton'], :genre => ['fiction'], :keywords => ['ocean'], :rating => [5])
Book.create(:title => 'The Firm', :author => 'John Grisham', :authors => ['John Grisham'], :genre => ['fiction'], :keywords => ['law', 'lawyer'], :rating => [4])
map = <<-MAP
function() {
var that = this;
this.contexts.forEach(function(context) {
that[context].forEach(function(tag) {
print('emitting. tag: ' + tag + ', ' + context +' : 1');
t = {};
t[context] = 1;
emit(tag, t)
});
});
}
MAP
Michael Crichton, { authors : 1 }
4, { rating : 1 }
velociraptor, { keywords : 1 }
clever girl, { keywords : 1 }
fiction, { genre : 1 }
Michael Crichton, { authors : 1 }
5, { rating : 1 }
ocean, { keywords : 1 }
fiction, { genre : 1 }
reduce('Michael Crichton', [ {authors : 1}, {authors : 1]}
reduce = <<-REDUCE
function(tag, values) {
res = {};
print('tag: ' + tag + ' values: ' + tojson(values));
values.forEach(function(tuple) {
for(context in tuple) {
if(res[context] === undefined) {
print(tag + ' is undefined for ' + context + ' setting to ' + tuple[context]);
res[context] = tuple[context];
} else {
print(tag + ' is currently ' + res[context] + ' incrementing by ' + tuple[context]);
res[context] += tuple[context];
}
}
});
print("returning tag: " + tag + " values: " + tojson(res));
return res;
}
REDUCE
5, values: { "rating" : 1 }
4, values: {"rating" : 1 }
Michael Crichton, values: { "authors" : 2 }
clever girl, values: { "keywords" : 1 }
fiction, values: { "genre" : 2 }
ocean, values: { "keywords" : 1 }
velociraptor, values: { "keywords" : 1 }
rating
5 : 1
4 : 1
authors
Michael Crichton: 2
keywords
clever girl : 1
ocean : 1
velociraptor : 1
genre
fiction : 2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment