Last active
September 3, 2018 09:21
-
-
Save MattOates/8628865a3952588b42f463f1fb50f056 to your computer and use it in GitHub Desktop.
Perl 5 times
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ perl6 -e 'srand(2); my $f = "genome.fa".IO.open(:w); while $f.tell < 150_000_000 { $f.put(">" ~ flat("A".."Z", "a".."z", "0".."9", "_", "-").roll((5..7).pick).join); $f.put(<A C G T>.roll(80).join()) for ^(3..16).pick }' | |
$ time perl genome.pl < genome.fa | |
real 0m2.019s | |
user 0m1.709s | |
sys 0m0.280s | |
$ cat genome.pl | |
use strict; | |
use warnings; | |
my %sequences; | |
my $current_seqid; | |
my $current_comment; | |
my $current_sequence; | |
sub parse_header { | |
my ($header) = @_; | |
($current_seqid, $current_comment) = split / /, $header; | |
} | |
# Deal with first sequence | |
parse_header(scalar <>); | |
while (my $line = <>) { | |
chomp $line; | |
if ($line =~ /^>/) { | |
$sequences{$current_seqid} = $current_sequence; | |
parse_header($line); | |
$current_sequence = ''; | |
} else { | |
$current_sequence .= $line; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment