Skip to content

Instantly share code, notes, and snippets.

@ajdavis
Created April 2, 2012 21:38
Show Gist options
  • Save ajdavis/2287410 to your computer and use it in GitHub Desktop.
Save ajdavis/2287410 to your computer and use it in GitHub Desktop.
1M doc find() benchmark
import java.util.Calendar;
import java.util.TimeZone;
import java.util.List;
import java.util.Set;
import java.util.ArrayList;
import com.mongodb.Mongo;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.mongodb.DBCursor;
public class Benchmark
{
private long count() {
try {
Mongo m = new Mongo("127.0.0.1", 27017);
DB db = m.getDB("testdb");
DBCollection coll = db.getCollection("test");
int year = 2010;
long count = 0;
// Query a year's worth of data, one month at a time.
for (int ii = 0; ii < 12; ii++) {
Calendar start = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
start.set(year, ii, 1, 0, 0, 0);
Calendar end = (Calendar)start.clone();
end.add(Calendar.MONTH, 1);
BasicDBObject query = new BasicDBObject();
query.put("date", new BasicDBObject("$gte", start.getTime()).append("$lt", end.getTime()));
DBCursor cur = coll.find(query).sort(new BasicDBObject("date", 1));
while (cur.hasNext()) {
DBObject obj = cur.next();
Integer val = (Integer)obj.get("val");
if (val < 1000000) {
count++;
} else {
return count;
}
}
}
return count;
}
catch (Exception e) {
System.err.println("Exception: " + e);
return 0;
}
}
private void run()
{
long t1 = System.currentTimeMillis();
long c = this.count();
long t2 = System.currentTimeMillis();
float diff = (float)(t2 - t1) / 1000;
System.out.println("Found " + c + " docs in " + diff + "seconds");
}
public static void main(String[] args)
{
Benchmark app = new Benchmark();
app.run();
}
}
import time
import pymongo
import datetime
import bson
assert bson.has_c()
conn = pymongo.Connection()
coll = conn.testdb.test
print 'Collection size is: %d' % (coll.count())
year = 2010
count = 0
t1 = time.time()
#
# Query a year's worth of data, one month at a time.
#
def foo():
global count
for ii in range(1, 13):
start = datetime.datetime(year, ii, 1)
if ii == 12:
end = datetime.datetime(year+1, 1, 1)
else:
end = datetime.datetime(year, ii+1, 1)
query = {'date': {'$gte': start, '$lt': end}}
curs = coll.find(query).sort([('date', 1)])
for doc in curs:
if doc['val'] < 1000000:
count += 1
else:
return
foo()
t2 = time.time()
print 'Found %d docs in %s seconds' % (count, t2 - t1)
# 4 docs per minute, 240 per hour, 5760 per day, 2.1 M per year
import datetime
import pymongo
import time
c = pymongo.Connection()
c.drop_database('testdb')
db = c.testdb
year = 2010
docno = 0
start = time.time()
db.test.ensure_index([('date', 1)])
for month in range(1, 13):
month_docs = 0
month_start = time.time()
dt = datetime.datetime(year, month, 1)
print dt.isoformat(' ')
while True:
# Generate documents for the day
docs = []
# 4 docs per minute
for minutes in range(60 * 24):
for i in range(4):
docs.append({
'date': dt + datetime.timedelta(minutes=minutes),
'str0': 'a' * 100,
'str1': 'b' * 100,
'val': docno,
})
docno += 1
db.test.insert(docs, safe=True)
month_docs += len(docs)
try:
dt = dt.replace(day=dt.day + 1)
except ValueError:
# Next month
break
print 'inserted %s docs in %.1f seconds' % (
month_docs, time.time() - month_start
)
print 'inserted %s docs total in %.1f seconds' % (
docno, time.time() - start
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment