Create a gist now

Instantly share code, notes, and snippets.

@rocky /.gitignore
Last active Aug 29, 2015

What would you like to do?
perl cr-nl-benchmark.pl
Rate vep_comp_o_if vep_comp_o vep vep_none_comp_o vep_none vep_none_comp_o_if vep_C vep_C_none
vep_comp_o_if 635/s -- -19% -20% -62% -83% -87% -89% -90%
vep_comp_o 787/s 24% -- -0% -52% -79% -84% -87% -87%
vep 789/s 24% 0% -- -52% -79% -84% -87% -87%
vep_none_comp_o 1653/s 160% 110% 109% -- -57% -66% -72% -73%
vep_none 3802/s 499% 383% 382% 130% -- -21% -35% -37%
vep_none_comp_o_if 4808/s 657% 511% 509% 191% 26% -- -18% -21%
vep_C 5848/s 821% 643% 641% 254% 54% 22% -- -4%
vep_C_none 6061/s 855% 670% 668% 267% 59% 26% 4% --
Notes
_comp - compiled regular expressions; For example: my $nl = s/\n/;
_none - no optimization, For example: s/\n//g;
_o - o flag on search. For example: s/\n//og;
_C - Inline::C for compiled C code
#!/usr/bin/perl
use Inline C;
use Benchmark qw(:all);
# my $Iterations = 100_000;
my $iterations = 10_000;
my $code_vep_none =
my $nl = qr/\n/;
my $cr = qr/\r/;
my $crnl = qr/[\n\r]/;
cmpthese($iterations, {
'vep' => sub {
for (my $i=0; $i<=1000; $i++) {
my $data = "now\nis\r\n the time\nfor\rall";
$data =~ s{\r}{}g;
$data =~ s{\n}{}g;
}},
'vep_none' => sub {
for (my $i=0; $i<=1000; $i++) {
my $data = "now is the time for all";
$data =~ s{\r}{}g;
$data =~ s{\n}{}g;
}},
'vep_C' => sub {
for (my $i=0; $i<=1000; $i++) {
my $data = strip_crnl("now\nis\r\n the time\nfor\rall");
}},
'vep_C_none' => sub {
for (my $i=0; $i<=1000; $i++) {
my $data = strip_crnl("nowis the timeforall");
}},
'vep_comp_o' => sub {
for (my $i=0; $i<=1000; $i++) {
my $data = "now\nis\r\n the time\nfor\rall";
$data =~ s{$nl}{}go;
$data =~ s{$cr}{}go;
}},
'vep_none_comp_o' => sub {
for (my $i=0; $i<=1000; $i++) {
my $data = "now is the time for all";
$data =~ s{$nl}{}g;
$data =~ s{$cr}{}g;
}},
'vep_comp_o_if' => sub {
for (my $i=0; $i<=1000; $i++) {
my $data = "now\nis\r\n the time\nfor\rall";
if ($data =~ m{$crnl}o) {
$data =~ s{$cr}{}og;
$data =~ s{$nl}{}og;
};
}},
'vep_none_comp_o_if' => sub {
for (my $i=0; $i<=1000; $i++) {
my $data = "now is the time for all";
if ($data =~ m{$crnl}o) {
$data =~ s{$cr}{}og;
$data =~ s{$nl}{}og;
};
}},
});
__END__
__C__
/* Strip all new line (\n) and carriage return (\r) characters
from string str
*/
char *strip_crnl(char* str) {
char *s;
char *s2 = str;
for (s = str; *s; *s++) {
if (*s != '\n' && *s != '\r') {
*s2++ = *s;
}
}
*s2 = '\0';
return str;
}
diff --git a/DB/Fasta.pm b/DB/Fasta.pm
index 40fac61..b49f882 100644
--- a/DB/Fasta.pm
o+++ b/DB/Fasta.pm
@@ -402,6 +402,47 @@ disclaimers of warranty.
#'
package Bio::DB::Fasta;
+# Compiling the below regular expressions speeds up the Pure Perl
+# seq/subseq() by about 7% from 7.76s to 7.22s over 32358 calls on
+# Variant Effect Prediction data.
+my $nl = qr/\n/;
+my $cr = qr/\r/;
+
+sub strip_crnl {
+ $_ = shift;
+ # The following two s/// statements can take a signficiant portion
+ # of time, in Variant Effect Prediction. To speed things up we
+ # compile the match portion.
+ # print "FOO\n"; # uncomment this to show which routine is called.
+ s/$nl//g;
+ s/$cr//g;
+ return $_;
+}
+
+# C can do perfrom strip_crnl much faster. But this requires the
+# Inline::C module which we don't require people to have. So we make
+# this optional by wrapping the C code in an eval. If the eval works,
+# the Perl strip_crnl() function is overwritten.
+eval q{
+ use Inline C => <<'END_OF_C_CODE';
+ /* Strip all new line (\n) and carriage return (\r) characters
+ from string str
+ */
+ char* strip_crnl(char* str) {
+ char *s;
+ char *s2 = str;
+ for (s = str; *s; *s++) {
+ if (*s != '\n' && *s != '\r') {
+ *s2++ = *s;
+ }
+ }
+ *s2 = '\0';
+ return str;
+ }
+END_OF_C_CODE
+};
+
+
BEGIN {
@AnyDBM_File::ISA = qw(DB_File GDBM_File NDBM_File SDBM_File)
}
@@ -924,8 +965,7 @@ sub subseq {
seek($fh,$filestart,0);
read($fh,$data,$filestop-$filestart+1);
- $data =~ s/\n//g;
- $data =~ s/\r//g;
+ $data = strip_crnl($data);
if ($reversed) {
$data = reverse $data;
$data =~ tr/gatcGATC/ctagCTAG/;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment