Skip to content

Instantly share code, notes, and snippets.

@greatfireball
Created June 26, 2016 20:29
Show Gist options
  • Save greatfireball/171807f737c5a42ac62ff50ce09be159 to your computer and use it in GitHub Desktop.
Save greatfireball/171807f737c5a42ac62ff50ce09be159 to your computer and use it in GitHub Desktop.
Simple FASTA file generator with three sequences sharing a single motif
use strict;
use warnings;
#srand(20160624);
my $srand_val = srand();
printf STDERR "Initialized random generator using %d\n", $srand_val;
my $seq_length = 10000;
my $percentage_a = 0.25;
my $percentage_c = 0.25;
my $percentage_g = 0.25;
my $percentage_t = 1-($percentage_a + $percentage_c + $percentage_g);
if ($percentage_t>1 || $percentage_t < 0)
{
die sprintf("Should contain at least some Ts but percentage is strange %f\n", $percentage_t);
}
my $percentage_motif = 0.15;
printf STDERR "Percentage A/C/G/T: %.0f/%.0f/%.0f/%.0f %%\n", map {$_*100} ($percentage_a, $percentage_c, $percentage_g, $percentage_t);
my $motif_len = int($percentage_motif * $seq_length);
my $motif_pos = rand($seq_length-$motif_len);
printf STDERR "Motif will be at position %d with a length of %d bp\n", $motif_pos, $motif_len;
my $s = "";
while (length($s)<$seq_length)
{
if (length($s)%250==0)
{
print STDERR length($s),"...";
}
my $val=rand();
if ($val < $percentage_a)
{
$s.="A";
} elsif ($val < $percentage_a+$percentage_c)
{
$s.="C";
} elsif ($val < $percentage_a+$percentage_c+$percentage_g)
{
$s.="G";
} else
{
$s.="T"
}
}
print STDERR "\n\nExtracting sequence parts...\n";
my $s_before_motif = substr($s, 0, $motif_pos);
my $s_motif = substr($s, $motif_pos, $motif_len);
my $s_after_motif = substr($s, $motif_pos+$motif_len);
printf STDERR "Total length of parts is %d (%d+%d+%d)\n", length($s_before_motif.$s_motif.$s_after_motif), length($s_before_motif), length($s_motif), length($s_after_motif);
printf ">normal_motif_at_%d_len_%d\n%s\n", $motif_pos, $motif_len, $s;
printf ">reverse_complement\n%s\n", fisher_yates_shuffle($s_after_motif).reverse_complement($s_motif).fisher_yates_shuffle($s_before_motif);
printf ">same_orientation\n%s\n", fisher_yates_shuffle($s_before_motif).$s_motif.fisher_yates_shuffle($s_after_motif);
# from http://www.perlmonks.org/?node_id=1869
sub fisher_yates_shuffle
{
my $seq = shift;
my @array = split //, $seq;
my $i = @array;
while ( --$i )
{
my $j = int rand( $i+1 );
@array[$i,$j] = @array[$j,$i];
}
return join("", @array);
}
sub reverse_complement
{
my $seq = shift;
$seq =~ tr/ACGT/TGCA/;
$seq = scalar reverse $seq;
return $seq;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment