Skip to content

Instantly share code, notes, and snippets.

@chatman
Created January 23, 2017 00:21
Show Gist options
  • Save chatman/c085af9d1ea9d9e2c56d037e0c6ab119 to your computer and use it in GitHub Desktop.
Save chatman/c085af9d1ea9d9e2c56d037e0c6ab119 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl -l
# bin/solr start -f -s /var/tmp/solr-5944-bench/solr/
# (time ./benchmark-update.pl) 2>&1 | tee log.txt && phone-alert done
use strict;
use warnings;
use LWP::UserAgent;
srand(42); # force deterministic set of updates in every run
my $dvo_field = "inplace_dvo_l";
my $stored_field = "stored_l";
my $max_doc_id = 20_000;
my $num_iters = 50;
my $num_updates_per_iter = 5_000; # per iteration & type of update
#my $commit_every_n_updates = 5_000; ##### no commits, no adds ... just atomic updates on DVO
# these "words" will actually be integer values, space separated to require tokenization
my $max_words_in_txt_field = 2000;
my $words_in_vocab = 4000;
my $ua = LWP::UserAgent->new();
sub do_regular_add {
my $id = shift;
print "ADD: $id";
my $v1 = int(rand(100_000_000));
my $v2 = int(rand(100_000_000));
my $words_in_this_doc = int(rand($max_words_in_txt_field));
my $txt = "";
map { $txt .= int(rand($words_in_vocab)) . " " } 0..$words_in_this_doc;
my $data = qq<[{"id":"$id", "$dvo_field":$v1, "$stored_field":$v2, "text":"$txt"}]>;
my $res = $ua->post('http://localhost:8983/solr/gettingstarted/update',
'Content-Type' => 'application/json',
'Content' => $data);
die $res->status_line unless $res->is_success;
}
sub do_atomic_update {
my $id = shift;
my $field = shift;
print "ATOMIC: $field $id";
my $v = int(rand(100_000_000));
my $data = qq<[{"id":"$id", "$field":{"set":$v}}]>;
my $res = $ua->post('http://localhost:8983/solr/gettingstarted/update',
'Content-Type' => 'application/json',
'Content' => $data);
die $res->status_line unless $res->is_success;
}
sub do_commit {
print "COMMIT";
my $res = $ua->post('http://localhost:8983/solr/gettingstarted/update?commit=true&openSearcher=true',
'Content-Type' => 'application/json',
'Content' => "[]");
die $res->status_line unless $res->is_success;
}
my %cmd_times = ("add" => 0,
$dvo_field => 0,
$stored_field => 0);
# seed the inex with docs
{
my $start = time();
print "##### Start Adds @ $start #####";
# seed the index with some documents
for (my $id = 0; $id < $max_doc_id; $id++) {
do_regular_add($id);
}
my $end = time();
my $total_add_time = $end - $start;
print "##### End Adds @ $end ($total_add_time) #####";
$cmd_times{"add"} = $total_add_time;
}
do_commit();
sleep(5);
for (my $iter = 0; $iter < $num_iters; $iter++) {
foreach my $field ($stored_field) { #### only update stored field, looking for 5944 overhead
# foreach my $field ($dvo_field, $stored_field) {
my $num_updates = $num_updates_per_iter;
my $start = time();
print "##### iter# $iter - Start Updates $field @ $start #####";
while (0 < $num_updates--) {
my $id = int(rand($max_doc_id));
do_atomic_update($id, $field);
}
my $end = time();
my $upd_time = $end - $start;
print "##### iter# $iter - End Updates $field @ $end ($upd_time) #####";
$cmd_times{$field} += $upd_time;
sleep(5);
}
}
print "";
print "";
print "##### Total times...";
print "";
foreach my $cmd (sort keys %cmd_times) {
print "### $cmd $cmd_times{$cmd}";
}
print "";
print "";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment