Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
# This script will generate the commands used to preshard a collection
# in mongodb.
#
# See:
# http://www.mongodb.org/display/DOCS/Splitting+Chunks
# http://www.mongodb.org/display/DOCS/Moving+Chunks
# ranges
my $min_id = 1;
my $max_id = 2**31;
my $num_shards = 3;
# namespace stuff
my $db = 'archive';
my $collection = 'postings';
my $shard_key = 'PostingID';
my $shard_prefix = 'archive'
;
# sizes
my $avg_doc_size = 2200; # bytes
my $chunk_size = 200 * 1024 * 1024; # bytes, 200MB default in mongo
# figure out num items per chunk based on above
my $ids_per_chunk = int($chunk_size/$avg_doc_size);
# spit out info
print "// $db.$collection sharded on $shard_key, $ids_per_chunk per chunk\n";
print "use admin;\n";
# emit the commands
my $id = $min_id;
my $count = 0;
# let's plan for the future a bit too (increase by 20%)
$max_id = int($max_id * 1.20);
while (1) {
$id += $ids_per_chunk;
last if $id > $max_id;
split_chunk($id);
my $shard_num = $count % $num_shards;
move_chunk($id, $shard_num);
$count++;
}
print "// $count total chunks\n"; # 20,971
exit;
# db.runCommand({ split: "$db.$collection",
# middle: { $shard_key: $id } })
sub split_chunk {
my ($id) = @_;
my $op = qq[db.runCommand({split: "$db.$collection", middle: { $shard_key: $id } })];
print "$op;\n";
}
# db.runCommand({moveChunk: "$db.$collection",
# find: { $shard_key: $id },
# to: "$shard" })
sub move_chunk {
my ($id, $shard_num) = @_;
my $shard_name = shard_to_name($shard_num);
my $op = qq[db.runCommand({moveChunk: "$db.$collection", find: { $shard_key: $id }, to: "$shard_name"})];
print "$op;\n";
}
sub shard_to_name {
my ($shard_num) = @_;
return $shard_prefix . sprintf "%03d", $shard_num+1;
}
__END__
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.