Last active
August 29, 2015 14:11
-
-
Save kikyousky/e8c9312cebdd9f2044ec to your computer and use it in GitHub Desktop.
util functions for mongodb sharding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pymongo | |
mongoh = pymongo.MongoClient('localhost',27030) | |
def enable_sharding(db): | |
try: | |
mongoh.admin.command('enableSharding', db) | |
except pymongo.errors.OperationFailure,e: | |
if 'already enabled' not in str(e): | |
raise | |
def drop_database(db): | |
mongoh[db].dropDatabase() | |
def drop_collection(db,collection): | |
mongoh[db][collection].drop() | |
def get_shards(): | |
shards = [] | |
for item in mongoh.config.shards.find(): | |
shards.append(item['_id']) | |
return shards | |
def _get_chunks(db,collection): | |
chunks = {} | |
for ch in mongoh.config.chunks.find({'ns':'%s.%s'%(db,collection)}): | |
if ch['shard'] not in chunks: | |
chunks[ch['shard']] = [] | |
chunks[ch['shard']].append(ch) | |
return chunks | |
def manual_balance(db,collection): | |
chunks = _get_chunks(db,collection) | |
shards = get_shards() | |
for sh in shards: | |
if sh not in chunks: | |
chunks[sh] = [] | |
total_chunk_num = sum([ len(n) for n in chunks.values() ]) | |
average_chunk_num = total_chunk_num / len(shards) | |
while True: | |
_counts = [ (n,chunks[n]) for n in chunks ] | |
min_shard = min(_counts,key=lambda x:len(x[1])) | |
if len(min_shard[1]) >= average_chunk_num: | |
break | |
max_shard = max(_counts,key=lambda x:len(x[1])) | |
print "move one chunk from %s to %s"%(max_shard[0],min_shard[0]) | |
chunk_to_move = max_shard[1].pop() | |
mongoh.admin.command("moveChunk",'%s.%s'%(db,collection),find=chunk_to_move['min'],to=min_shard[0]) | |
min_shard[1].append(chunk_to_move) | |
print "balance finish" | |
chunks = _get_chunks(db,collection) | |
for ch in chunks: | |
print ch,len(chunks[ch]) | |
def shard_collection(db,collection,predrop=False,presplit=False,shard_key={"_id":1},chunk_partition_values=[]): | |
if predrop: | |
drop_collection(db,collection) | |
is_empty = ( mongoh[db][collection].count() == 0 ) | |
is_key_hashed = ( str(shard_key.values()[0]).lower() == "hashed" ) | |
enable_sharding(db) | |
mongoh.admin.command('shardCollection', '%s.%s'%(db,collection), key=shard_key) | |
if not presplit : | |
return | |
if not is_empty: | |
print "collection is not empty, skip presplit" | |
return | |
if is_empty and is_key_hashed : | |
print "If you shard an empty collection using a hashed shard key, MongoDB will automatically create and migrate chunks" | |
print "no need to do presplit" | |
return | |
# presplit collection, then move empty chunks around cluster | |
for v in chunk_partition_values: | |
mongoh.admin.command('split','%s.%s'%(db,collection),middle=v) | |
manual_balance(db,collection) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment