Last active
December 11, 2015 23:18
-
-
Save draegtun/4675254 to your computer and use it in GitHub Desktop.
Faster fasta.pl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use 5.016; | |
use warnings; | |
# See: http://news.ycombinator.com/item?id=5141179 | |
# | |
# This fasta.pl is a port of fasta.rb and is 2.6 times quicker than original alioth fasta.pl | |
use constant IM => 139968; | |
use constant IA => 3877; | |
use constant IC => 29573; | |
my $LAST = 42; | |
my $alu = | |
'GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG' . | |
'GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA' . | |
'CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT' . | |
'ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA' . | |
'GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG' . | |
'AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC' . | |
'AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA'; | |
my $iub = [ | |
[ 'a', 0.27 ], | |
[ 'c', 0.12 ], | |
[ 'g', 0.12 ], | |
[ 't', 0.27 ], | |
[ 'B', 0.02 ], | |
[ 'D', 0.02 ], | |
[ 'H', 0.02 ], | |
[ 'K', 0.02 ], | |
[ 'M', 0.02 ], | |
[ 'N', 0.02 ], | |
[ 'R', 0.02 ], | |
[ 'S', 0.02 ], | |
[ 'V', 0.02 ], | |
[ 'W', 0.02 ], | |
[ 'Y', 0.02 ] | |
]; | |
my $homosapiens = [ | |
[ 'a', 0.3029549426680 ], | |
[ 'c', 0.1979883004921 ], | |
[ 'g', 0.1975473066391 ], | |
[ 't', 0.3015094502008 ] | |
]; | |
sub make_repeat_fasta { | |
my ($src, $n) = @_; | |
my $width = qr/(.{1,60})/; | |
my $l = length $src; | |
my $s = $src x (($n / $l) + 1); | |
substr($s, $n, $l) = ''; | |
while ($s =~ m/$width/g) { say $1 } | |
# say for unpack '(a60)*', $s; # slower than above over larger strings | |
} | |
sub make_random_fasta { | |
my ($table, $n) = @_; | |
my $rand = undef; | |
my $width = 60; | |
my $prob = 0.0; | |
$_->[1] = ($prob += $_->[1]) for @$table; | |
my $collector = '$rand = ($LAST = ($LAST * IA + IC) % IM) / IM;'; | |
$collector .= "print('$_->[0]') && next if $_->[1] > \$rand;" for @$table; | |
my $code = q{ | |
for (1..($n / $width)) { | |
for (1..$width) { !C! } | |
print "\n"; | |
} | |
if ($n % $width != 0) { | |
for (1 .. $n % $width) { !C! } | |
print "\n"; | |
} | |
}; | |
$code =~ s/!C!/$collector/g; | |
eval $code; | |
} | |
my $n = $ARGV[0] || 27; | |
say ">ONE Homo sapiens alu"; | |
make_repeat_fasta($alu, $n*2); | |
say ">TWO IUB ambiguity codes"; | |
make_random_fasta($iub, $n*3); | |
say ">THREE Homo sapiens frequency"; | |
make_random_fasta($homosapiens, $n*5); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment