Last active
August 29, 2015 14:07
-
-
Save hyphaltip/c8c00307d699938c87c2 to your computer and use it in GitHub Desktop.
process some sequences for CEGMA processing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!env perl | |
use strict; | |
use warnings; | |
my $dir = shift || "marker_files"; | |
my $odir = shift || "marker_hmm"; | |
mkdir($odir) unless -d $odir; | |
opendir(DIR,$dir)|| die "cannot open $dir: $!"; | |
my $locusct =1; | |
for my $file ( readdir(DIR) ) { | |
next unless $file =~ /(\S+)\.fa$/; | |
my $gene = $1; | |
open(my $in => "$dir/$file") || die $!; | |
# make the AFTOL gene name just sequentially going through this | |
my $aftolname = sprintf("AFTOL%03d",$locusct++); | |
open(my $out => ">$odir/$gene.fa") || die $!; | |
while(<$in> ) { | |
s/^>([^\|]+)\|/>/; # get rid of the leading 'species prefix' | |
# >SPECIES|GENE | |
s/^>(\S+)/>$1\_\_$aftolname/; # rename to LOCUS_AFTOL123 | |
s/\*//g; # stop codons from AA file, muscle and hmmer don't like these | |
print $out $_; # just print back out the line to the open filehandle | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment