Created
January 23, 2017 00:21
-
-
Save chatman/c085af9d1ea9d9e2c56d037e0c6ab119 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -l | |
# bin/solr start -f -s /var/tmp/solr-5944-bench/solr/ | |
# (time ./benchmark-update.pl) 2>&1 | tee log.txt && phone-alert done | |
use strict; | |
use warnings; | |
use LWP::UserAgent; | |
srand(42); # force deterministic set of updates in every run | |
my $dvo_field = "inplace_dvo_l"; | |
my $stored_field = "stored_l"; | |
my $max_doc_id = 20_000; | |
my $num_iters = 50; | |
my $num_updates_per_iter = 5_000; # per iteration & type of update | |
#my $commit_every_n_updates = 5_000; ##### no commits, no adds ... just atomic updates on DVO | |
# these "words" will actually be integer values, space separated to require tokenization | |
my $max_words_in_txt_field = 2000; | |
my $words_in_vocab = 4000; | |
my $ua = LWP::UserAgent->new(); | |
sub do_regular_add { | |
my $id = shift; | |
print "ADD: $id"; | |
my $v1 = int(rand(100_000_000)); | |
my $v2 = int(rand(100_000_000)); | |
my $words_in_this_doc = int(rand($max_words_in_txt_field)); | |
my $txt = ""; | |
map { $txt .= int(rand($words_in_vocab)) . " " } 0..$words_in_this_doc; | |
my $data = qq<[{"id":"$id", "$dvo_field":$v1, "$stored_field":$v2, "text":"$txt"}]>; | |
my $res = $ua->post('http://localhost:8983/solr/gettingstarted/update', | |
'Content-Type' => 'application/json', | |
'Content' => $data); | |
die $res->status_line unless $res->is_success; | |
} | |
sub do_atomic_update { | |
my $id = shift; | |
my $field = shift; | |
print "ATOMIC: $field $id"; | |
my $v = int(rand(100_000_000)); | |
my $data = qq<[{"id":"$id", "$field":{"set":$v}}]>; | |
my $res = $ua->post('http://localhost:8983/solr/gettingstarted/update', | |
'Content-Type' => 'application/json', | |
'Content' => $data); | |
die $res->status_line unless $res->is_success; | |
} | |
sub do_commit { | |
print "COMMIT"; | |
my $res = $ua->post('http://localhost:8983/solr/gettingstarted/update?commit=true&openSearcher=true', | |
'Content-Type' => 'application/json', | |
'Content' => "[]"); | |
die $res->status_line unless $res->is_success; | |
} | |
my %cmd_times = ("add" => 0, | |
$dvo_field => 0, | |
$stored_field => 0); | |
# seed the inex with docs | |
{ | |
my $start = time(); | |
print "##### Start Adds @ $start #####"; | |
# seed the index with some documents | |
for (my $id = 0; $id < $max_doc_id; $id++) { | |
do_regular_add($id); | |
} | |
my $end = time(); | |
my $total_add_time = $end - $start; | |
print "##### End Adds @ $end ($total_add_time) #####"; | |
$cmd_times{"add"} = $total_add_time; | |
} | |
do_commit(); | |
sleep(5); | |
for (my $iter = 0; $iter < $num_iters; $iter++) { | |
foreach my $field ($stored_field) { #### only update stored field, looking for 5944 overhead | |
# foreach my $field ($dvo_field, $stored_field) { | |
my $num_updates = $num_updates_per_iter; | |
my $start = time(); | |
print "##### iter# $iter - Start Updates $field @ $start #####"; | |
while (0 < $num_updates--) { | |
my $id = int(rand($max_doc_id)); | |
do_atomic_update($id, $field); | |
} | |
my $end = time(); | |
my $upd_time = $end - $start; | |
print "##### iter# $iter - End Updates $field @ $end ($upd_time) #####"; | |
$cmd_times{$field} += $upd_time; | |
sleep(5); | |
} | |
} | |
print ""; | |
print ""; | |
print "##### Total times..."; | |
print ""; | |
foreach my $cmd (sort keys %cmd_times) { | |
print "### $cmd $cmd_times{$cmd}"; | |
} | |
print ""; | |
print ""; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment