public
Last active

Faster fasta.pl

  • Download Gist
fasta.pl
Perl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
use 5.016;
use warnings;
 
# See: http://news.ycombinator.com/item?id=5141179
#
# This fasta.pl is a port of fasta.rb and is 2.6 times quicker than original alioth fasta.pl
 
use constant IM => 139968;
use constant IA => 3877;
use constant IC => 29573;
 
my $LAST = 42;
 
my $alu =
'GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG' .
'GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA' .
'CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT' .
'ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA' .
'GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG' .
'AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC' .
'AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA';
 
my $iub = [
[ 'a', 0.27 ],
[ 'c', 0.12 ],
[ 'g', 0.12 ],
[ 't', 0.27 ],
[ 'B', 0.02 ],
[ 'D', 0.02 ],
[ 'H', 0.02 ],
[ 'K', 0.02 ],
[ 'M', 0.02 ],
[ 'N', 0.02 ],
[ 'R', 0.02 ],
[ 'S', 0.02 ],
[ 'V', 0.02 ],
[ 'W', 0.02 ],
[ 'Y', 0.02 ]
];
 
my $homosapiens = [
[ 'a', 0.3029549426680 ],
[ 'c', 0.1979883004921 ],
[ 'g', 0.1975473066391 ],
[ 't', 0.3015094502008 ]
];
 
sub make_repeat_fasta {
my ($src, $n) = @_;
my $width = qr/(.{1,60})/;
my $l = length $src;
my $s = $src x (($n / $l) + 1);
substr($s, $n, $l) = '';
 
while ($s =~ m/$width/g) { say $1 }
# say for unpack '(a60)*', $s; # slower than above over larger strings
}
 
sub make_random_fasta {
my ($table, $n) = @_;
my $rand = undef;
my $width = 60;
my $prob = 0.0;
 
$_->[1] = ($prob += $_->[1]) for @$table;
 
my $collector = '$rand = ($LAST = ($LAST * IA + IC) % IM) / IM;';
$collector .= "print('$_->[0]') && next if $_->[1] > \$rand;" for @$table;
 
my $code = q{
for (1..($n / $width)) {
for (1..$width) { !C! }
print "\n";
}
if ($n % $width != 0) {
for (1 .. $n % $width) { !C! }
print "\n";
}
};
$code =~ s/!C!/$collector/g;
eval $code;
}
 
 
 
my $n = $ARGV[0] || 27;
 
say ">ONE Homo sapiens alu";
make_repeat_fasta($alu, $n*2);
 
say ">TWO IUB ambiguity codes";
make_random_fasta($iub, $n*3);
 
say ">THREE Homo sapiens frequency";
make_random_fasta($homosapiens, $n*5);

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.