Skip to content

Instantly share code, notes, and snippets.

@kikyousky
Last active August 29, 2015 14:11
Show Gist options
  • Save kikyousky/e8c9312cebdd9f2044ec to your computer and use it in GitHub Desktop.
Save kikyousky/e8c9312cebdd9f2044ec to your computer and use it in GitHub Desktop.
util functions for mongodb sharding
import pymongo
mongoh = pymongo.MongoClient('localhost',27030)
def enable_sharding(db):
try:
mongoh.admin.command('enableSharding', db)
except pymongo.errors.OperationFailure,e:
if 'already enabled' not in str(e):
raise
def drop_database(db):
mongoh[db].dropDatabase()
def drop_collection(db,collection):
mongoh[db][collection].drop()
def get_shards():
shards = []
for item in mongoh.config.shards.find():
shards.append(item['_id'])
return shards
def _get_chunks(db,collection):
chunks = {}
for ch in mongoh.config.chunks.find({'ns':'%s.%s'%(db,collection)}):
if ch['shard'] not in chunks:
chunks[ch['shard']] = []
chunks[ch['shard']].append(ch)
return chunks
def manual_balance(db,collection):
chunks = _get_chunks(db,collection)
shards = get_shards()
for sh in shards:
if sh not in chunks:
chunks[sh] = []
total_chunk_num = sum([ len(n) for n in chunks.values() ])
average_chunk_num = total_chunk_num / len(shards)
while True:
_counts = [ (n,chunks[n]) for n in chunks ]
min_shard = min(_counts,key=lambda x:len(x[1]))
if len(min_shard[1]) >= average_chunk_num:
break
max_shard = max(_counts,key=lambda x:len(x[1]))
print "move one chunk from %s to %s"%(max_shard[0],min_shard[0])
chunk_to_move = max_shard[1].pop()
mongoh.admin.command("moveChunk",'%s.%s'%(db,collection),find=chunk_to_move['min'],to=min_shard[0])
min_shard[1].append(chunk_to_move)
print "balance finish"
chunks = _get_chunks(db,collection)
for ch in chunks:
print ch,len(chunks[ch])
def shard_collection(db,collection,predrop=False,presplit=False,shard_key={"_id":1},chunk_partition_values=[]):
if predrop:
drop_collection(db,collection)
is_empty = ( mongoh[db][collection].count() == 0 )
is_key_hashed = ( str(shard_key.values()[0]).lower() == "hashed" )
enable_sharding(db)
mongoh.admin.command('shardCollection', '%s.%s'%(db,collection), key=shard_key)
if not presplit :
return
if not is_empty:
print "collection is not empty, skip presplit"
return
if is_empty and is_key_hashed :
print "If you shard an empty collection using a hashed shard key, MongoDB will automatically create and migrate chunks"
print "no need to do presplit"
return
# presplit collection, then move empty chunks around cluster
for v in chunk_partition_values:
mongoh.admin.command('split','%s.%s'%(db,collection),middle=v)
manual_balance(db,collection)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment