Created
May 9, 2015 10:14
-
-
Save MattOates/77b425166a3acc8dd314 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://raw.githubusercontent.com/kevina/wordlist/master/pos/part-of-speech.txt | |
my $input = slurp('part-of-speech.txt'); | |
my %words; | |
sub bench($name, &code) { | |
%words = (); | |
my $start = now; | |
code; | |
my $end = now; | |
say "$name: { +%words } words parsed in { ($end - $start).round(0.1) }s"; | |
} | |
bench 'nqp', { | |
use nqp; | |
my str $s = $input; | |
my int $pos = 0; | |
my int $chars = nqp::chars($s); | |
while $pos < $chars { | |
my int $tab = nqp::index($s, "\t", $pos); | |
my int $nl = nqp::index($s, "\n", $tab); | |
%words{nqp::p6box_s(nqp::substr($s, $pos, $tab - $pos))} = | |
nqp::p6box_s(nqp::substr($s, $tab + 1, $nl - $tab - 1)); | |
$pos = $nl + 1; | |
} | |
} | |
bench 'native p6', { | |
my str $s = $input; | |
my int $pos = 0; | |
my int $chars = $s.chars; | |
while $pos < $chars { | |
my int $tab = $s.index("\t", $pos); | |
my int $nl = $s.index("\n", $tab); | |
%words{$s.substr($pos, $tab - $pos)} = | |
$s.substr($tab + 1, $nl - $tab - 1); | |
$pos = $nl + 1; | |
} | |
} | |
bench 'eager lines substr', { | |
for $input.lines(:eager) { | |
my $tab = .index("\t"); | |
%words{.substr(0, $tab)} = .substr($tab + 1); | |
} | |
} | |
bench 'eager lines split', { | |
for $input.lines(:eager) { | |
my ($k, $v) = .split("\t", 2); | |
%words{$k} = $v; | |
} | |
} | |
bench 'utterly naive p6', { | |
%words = 'part-of-speech.txt'.IO.lines>>.split("\t"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
nqp: 295172 words parsed in 1.7s
native p6: 295172 words parsed in 6.5s
eager lines substr: 295172 words parsed in 7.8s
eager lines split: 295172 words parsed in 161.3s
utterly naive p6: 295172 words parsed in 546.1s
The naive what I did first in Perl 6 is 320x times slower than the NQP approach and is also about as bad in max memory usage difference.
Weirdly the native Perl 6 benchmark is not very different to how split("\t") operates, the main difference I can see is the use of a while rather than map {} in the Rakudo source (https://github.com/rakudo/rakudo/blob/nom/src/core/Str.pm#L871). This might be the cause of the big difference?