Skip to content

Instantly share code, notes, and snippets.

@zakame
Created August 30, 2013 08:34
Show Gist options
  • Save zakame/6387625 to your computer and use it in GitHub Desktop.
Save zakame/6387625 to your computer and use it in GitHub Desktop.
Benchmarking a few ways of splitting some lines and inserting into a hash
#!/usr/bin/env perl
use warnings;
use strict;
use Benchmark qw(:all);
use Text::CSV_XS;
# file with 1_000_000 entries of 'test|N,xxxN|noob' where N = number
my $file = '/tmp/test.txt';
my $readable_split = sub {
my %h;
open my $fh, '<', $file
or die "Can't open $file: $!";
while ( my $line = <$fh> ) {
my @fields = split /\|/ => $line;
# get the first item, ignore the rest (note parens in lvalue)
my ($key) = split /\,/ => $fields[1];
$h{$key} = $line;
}
};
my $cascaded_split = sub {
my %h;
open my $fh, '<', $file
or die "Can't open $file: $!";
while ( my $line = <$fh> ) {
my $key = ( split /,/ => ( split /\|/ => $line )[1] )[0];
$h{$key} = $line;
}
};
my $csv_xs_split = sub {
my %h;
my $csv = Text::CSV_XS->new( { sep_char => '|' } )
or die "Cannot use CSV: ", Text::CSV_XS->error_diag;
open my $fh, '<', $file
or die "Can't open $file: $!";
while ( my $line = $csv->getline($fh) ) {
my ($key) = split /,/ => $line->[1];
$h{$key} = join '|' => @$line;
}
};
# run these splits on that file several times
my $results = timethese(
10,
{ readable_split => $readable_split,
cascaded_split => $cascaded_split,
csv_xs_split => $csv_xs_split,
}
);
cmpthese($results);
__END__
zakame@yorozuya:/tmp% perl bench_hash.pl
Benchmark: timing 10 iterations of cascaded_split, csv_xs_split, readable_split...
cascaded_split: 21 wallclock secs (21.01 usr + 0.13 sys = 21.14 CPU) @ 0.47/s (n=10)
csv_xs_split: 49 wallclock secs (48.62 usr + 0.12 sys = 48.74 CPU) @ 0.21/s (n=10)
readable_split: 27 wallclock secs (26.90 usr + 0.10 sys = 27.00 CPU) @ 0.37/s (n=10)
s/iter csv_xs_split readable_split cascaded_split
csv_xs_split 4.87 -- -45% -57%
readable_split 2.70 81% -- -22%
cascaded_split 2.11 131% 28% --
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment