Skip to content

Instantly share code, notes, and snippets.

@RandallShanePhD
Created July 28, 2014 20:59
Show Gist options
  • Save RandallShanePhD/0d65ac4f222345818b9a to your computer and use it in GitHub Desktop.
Save RandallShanePhD/0d65ac4f222345818b9a to your computer and use it in GitHub Desktop.
Decode and Encode object ids in MongoDB using pymongo
# MongoDB Object ID decode
# Randall Shane, PhD
# RShane@BaseXVI.com
# BaseXVI.com
# 28 July 2014
'''MongoDB creates a unique id ('_id' key) for every document
which is a 12 byte object comprised of 4 parts as follows:
time stamp: 4 bytes
machine id: 3 bytes (MD5 hash of host name/mac)
process id: 2 bytes (first 2 bytes of the PID)
unique id: 3 byte integer value
Decode and encode as follows'''
from bson import objectid
from datetime import datetime
from pymongo import MongoClient
client = MongoClient('localhost', 27017)
db = client['test']
coll = db['testData']
def decode(oid):
"""Output is a dict containing:
{ oid: (str) Original Object ID,
ets: (int) Epoch TimeStamp,
fts: (str) Formatted TimeStamp,
mid: (int) Machine ID,
pid: (int) Process ID,
uid: (int) Unique ID }"""
out = {}
out['oid'] = str(objectid.ObjectId(oid))
out['ets'] = int(out['oid'][0:8], 16)
out['mid'] = int(out['oid'][8:14], 16)
out['pid'] = int(out['oid'][14:18], 16)
out['uid'] = int(out['oid'][18:], 16)
try:
out['fts'] = datetime.fromtimestamp(out['ets'])\
.strftime('%d %b %Y %H:%M:%S')
except:
pass
print out # for display
return out
def encode(data):
"""Input options is a dict of the 4 components
{ ets: (int) Epoch TimeStamp,
mid: (int) Machine ID,
pid: (int) Process ID,
uid: (int) Unique ID }"""
oid = hex(data['ets']).replace('0x', '')
oid += hex(data['mid']).replace('0x', '')
oid += hex(data['pid']).replace('0x', '').zfill(4)
oid += hex(data['uid']).replace('0x', '').zfill(6)
print oid
return oid
if __name__ == '__main__':
# Decode on a collection created in the MongoDB shell by:
# for (var i = 0; i <= 5; i++) db.testData.insert( { x : i } )
print 'DECODE:'
[decode(x['_id']) for x in coll.find({}, {'_id': 1})]
# Encode test creating ObjectId using specific information
print 'ENCODE:'
data = {'ets': 1406557108, 'mid': 9999, 'pid': 8888, 'uid': 7777}
encode(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment