Skip to content

Instantly share code, notes, and snippets.

@kgorman
Created May 17, 2011 20:38
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save kgorman/977336 to your computer and use it in GitHub Desktop.
Save kgorman/977336 to your computer and use it in GitHub Desktop.
Data Density Example Script
// data density example
// shows how storing/sorting data by key can reduce I/O drastically in MongoDB
// diskloc gives file # and offset of a given document, divide by 512 for block #
// 2011 kcg
// start clean
db.disktest_noorg.drop();
db.disktest_org.drop();
// create some random data in non userid dense form
for(var i=0; i<100000; i++) {
var userid=Math.floor(Math.random()*50000);
var data=hex_md5("this is a sample piece of data just for fun "+i);
var imageid=Math.floor(Math.random()*100);
x={userid:userid,imageid:imageid,img:"www.kennygorman.com/foo.jpg",title:"This is a sample title",data:data}
db.disktest_noorg.insert(x);
}
// create indexes
db.disktest_noorg.ensureIndex({"userid":-1});
db.disktest_org.ensureIndex({"userid":-1});
// now make a sorted/dense by userid version
var arr=db.disktest_noorg.find().sort({userid:-1})
for(var i=0; i<arr.length(); i++) {
db.disktest_org.insert(arr[i]);
}
// take a peek at where things live unorganized
var arr=db.disktest_noorg.find({}, {'$diskLoc':1, 'userid':1}).limit(20).showDiskLoc()
for(var i=0; i<arr.length(); i++) {
var b=Math.round(arr[i].$diskLoc.offset/512);
printjson(arr[i].userid+" "+b);
}
// take a peek where things live organized
var arr=db.disktest_org.find({}, {'$diskLoc':1, 'userid':1}).limit(20).showDiskLoc()
for(var i=0; i<arr.length(); i++) {
var b=Math.round(arr[i].$diskLoc.offset/512);
printjson(arr[i].userid+" "+b);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment