chatman/benchmark-only-stored-atomic-updates.pl

## benchmark-only-stored-atomic-updates.pl
#!/usr/bin/perl -l

# bin/solr start -f -s /var/tmp/solr-5944-bench/solr/
# (time ./benchmark-update.pl) 2>&1 | tee log.txt && phone-alert done


use strict;
use warnings;
use LWP::UserAgent;

srand(42); # force deterministic set of updates in every run

my $dvo_field = "inplace_dvo_l";
my $stored_field = "stored_l";

my $max_doc_id = 20_000;
my $num_iters = 50;
my $num_updates_per_iter = 5_000; # per iteration & type of update


#my $commit_every_n_updates = 5_000; ##### no commits, no adds ... just atomic updates on DVO

# these "words" will actually be integer values, space separated to require tokenization
my $max_words_in_txt_field = 2000;
my $words_in_vocab = 4000;

my $ua = LWP::UserAgent->new();

sub do_regular_add {
    my $id = shift;
    print "ADD: $id";
    my $v1 = int(rand(100_000_000));
    my $v2 = int(rand(100_000_000));

    my $words_in_this_doc = int(rand($max_words_in_txt_field));
    my $txt = "";
    map { $txt .= int(rand($words_in_vocab)) . " " } 0..$words_in_this_doc;
    my $data = qq<[{"id":"$id", "$dvo_field":$v1, "$stored_field":$v2, "text":"$txt"}]>;
    my $res = $ua->post('http://localhost:8983/solr/gettingstarted/update',
			'Content-Type' => 'application/json',
			'Content' => $data);
    die $res->status_line unless $res->is_success;
}

sub do_atomic_update {
    my $id = shift;
    my $field = shift;
    print "ATOMIC: $field $id";
    my $v = int(rand(100_000_000));
    my $data = qq<[{"id":"$id", "$field":{"set":$v}}]>;
    my $res = $ua->post('http://localhost:8983/solr/gettingstarted/update',
			'Content-Type' => 'application/json',
			'Content' => $data);
    die $res->status_line unless $res->is_success;
}

sub do_commit {
    print "COMMIT";
    my $res = $ua->post('http://localhost:8983/solr/gettingstarted/update?commit=true&openSearcher=true',
			'Content-Type' => 'application/json',
			'Content' => "[]");
    die $res->status_line unless $res->is_success;
}

my %cmd_times = ("add" => 0,
		 $dvo_field => 0,
		 $stored_field => 0);

# seed the inex with docs
{
    my $start = time();
    print "##### Start Adds @ $start #####";

    # seed the index with some documents
    for (my $id = 0; $id < $max_doc_id; $id++) {
	do_regular_add($id);
    }

    my $end = time();
    my $total_add_time = $end - $start;
    print "##### End Adds @ $end ($total_add_time) #####";
    $cmd_times{"add"} = $total_add_time;
}

do_commit();
sleep(5);


for (my $iter = 0; $iter < $num_iters; $iter++) {
    foreach my $field ($stored_field) {         #### only update stored field, looking for 5944 overhead
#    foreach my $field ($dvo_field, $stored_field) {
	my $num_updates = $num_updates_per_iter;
	my $start = time();
	print "##### iter# $iter - Start Updates $field @ $start #####";
	while (0 < $num_updates--) {
	    my $id = int(rand($max_doc_id));

	    do_atomic_update($id, $field);
	}
	my $end = time();
	my $upd_time = $end - $start;
	print "##### iter# $iter - End Updates $field @ $end ($upd_time) #####";
	$cmd_times{$field} += $upd_time;
	sleep(5);
    }
}

print "";
print "";
print "##### Total times...";
print "";
foreach my $cmd (sort keys %cmd_times) {
    print "### $cmd $cmd_times{$cmd}";
}
print "";
print "";
	#!/usr/bin/perl -l

	# bin/solr start -f -s /var/tmp/solr-5944-bench/solr/
	# (time ./benchmark-update.pl) 2>&1 \| tee log.txt && phone-alert done


	use strict;
	use warnings;
	use LWP::UserAgent;

	srand(42); # force deterministic set of updates in every run

	my $dvo_field = "inplace_dvo_l";
	my $stored_field = "stored_l";

	my $max_doc_id = 20_000;
	my $num_iters = 50;
	my $num_updates_per_iter = 5_000; # per iteration & type of update


	#my $commit_every_n_updates = 5_000; ##### no commits, no adds ... just atomic updates on DVO

	# these "words" will actually be integer values, space separated to require tokenization
	my $max_words_in_txt_field = 2000;
	my $words_in_vocab = 4000;

	my $ua = LWP::UserAgent->new();

	sub do_regular_add {
	my $id = shift;
	print "ADD: $id";
	my $v1 = int(rand(100_000_000));
	my $v2 = int(rand(100_000_000));

	my $words_in_this_doc = int(rand($max_words_in_txt_field));
	my $txt = "";
	map { $txt .= int(rand($words_in_vocab)) . " " } 0..$words_in_this_doc;
	my $data = qq<[{"id":"$id", "$dvo_field":$v1, "$stored_field":$v2, "text":"$txt"}]>;
	my $res = $ua->post('http://localhost:8983/solr/gettingstarted/update',
	'Content-Type' => 'application/json',
	'Content' => $data);
	die $res->status_line unless $res->is_success;
	}

	sub do_atomic_update {
	my $id = shift;
	my $field = shift;
	print "ATOMIC: $field $id";
	my $v = int(rand(100_000_000));
	my $data = qq<[{"id":"$id", "$field":{"set":$v}}]>;
	my $res = $ua->post('http://localhost:8983/solr/gettingstarted/update',
	'Content-Type' => 'application/json',
	'Content' => $data);
	die $res->status_line unless $res->is_success;
	}

	sub do_commit {
	print "COMMIT";
	my $res = $ua->post('http://localhost:8983/solr/gettingstarted/update?commit=true&openSearcher=true',
	'Content-Type' => 'application/json',
	'Content' => "[]");
	die $res->status_line unless $res->is_success;
	}

	my %cmd_times = ("add" => 0,
	$dvo_field => 0,
	$stored_field => 0);

	# seed the inex with docs
	{
	my $start = time();
	print "##### Start Adds @ $start #####";

	# seed the index with some documents
	for (my $id = 0; $id < $max_doc_id; $id++) {
	do_regular_add($id);
	}

	my $end = time();
	my $total_add_time = $end - $start;
	print "##### End Adds @ $end ($total_add_time) #####";
	$cmd_times{"add"} = $total_add_time;
	}

	do_commit();
	sleep(5);


	for (my $iter = 0; $iter < $num_iters; $iter++) {
	foreach my $field ($stored_field) { #### only update stored field, looking for 5944 overhead
	# foreach my $field ($dvo_field, $stored_field) {
	my $num_updates = $num_updates_per_iter;
	my $start = time();
	print "##### iter# $iter - Start Updates $field @ $start #####";
	while (0 < $num_updates--) {
	my $id = int(rand($max_doc_id));

	do_atomic_update($id, $field);
	}
	my $end = time();
	my $upd_time = $end - $start;
	print "##### iter# $iter - End Updates $field @ $end ($upd_time) #####";
	$cmd_times{$field} += $upd_time;
	sleep(5);
	}
	}

	print "";
	print "";
	print "##### Total times...";
	print "";
	foreach my $cmd (sort keys %cmd_times) {
	print "### $cmd $cmd_times{$cmd}";
	}
	print "";
	print "";