Skip to content

Instantly share code, notes, and snippets.

@nickjacob
Created March 13, 2014 08:07
Show Gist options
  • Save nickjacob/9524028 to your computer and use it in GitHub Desktop.
Save nickjacob/9524028 to your computer and use it in GitHub Desktop.
merge a series of level dbs to provide an output db of a specified length
#!/usr/bin/env perl
use strict;
use warnings;
use Tie::LevelDB;
sub foreach_key {
my ($db_name, $block) = @_;
my $db = new Tie::LevelDB::DB($db_name);
my $it = $db->NewIterator;
for ($it->SeekToFirst; $it->Valid; $it->Next) {
return 1 if $block->($it->key, $it->value) == 0;
}
return 0;
}
unless (caller) {
my ($count, $output, @dbs) = @ARGV;
if (!$count || !$output) {
print STDERR "usage: <output count> <output db name> [source...]\n";
exit 2;
}
my $out_db = new Tie::LevelDB::DB($output);
my $current = 0;
foreach my $db (@dbs) {
print "working with: $db\n";
my $ret = foreach_key($db, my $s = sub {
my ($k, $v) = @_;
$out_db->Put($k, $v);
return (++$current == $count) ? 0 : 1;
});
if ($ret == 1) {
print "finished - enough data!\n";
last;;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment