Last active
August 29, 2015 14:02
-
-
Save bio-boris/91d7e627af46b451d8be to your computer and use it in GitHub Desktop.
Grab Only Genes from Trinity Assembly FASTA Outp http://seqanswers.com/forums/showthread.php?t=43749
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# Author : bio-boris | |
# Date : May 30, 2014 | |
use strict; | |
use warnings; | |
my $input = "example.fasta"; | |
open F, $input or die $!; | |
#Load sequences into memory | |
my %sequences; | |
my $current_header; | |
while( my $line = <F>){ | |
chomp $line; | |
if(substr($line,0,1) eq ">"){ | |
$current_header = $line; | |
} | |
else{ | |
if(not defined $sequences{$current_header}){ | |
$sequences{$current_header} = $line; | |
} | |
else{ | |
$sequences{$current_header} .= $line; | |
} | |
} | |
} | |
#Now get the longest sequence | |
my %sequence_lengths; | |
foreach my $header(sort keys %sequences){ | |
my @full_id = split /_/ , $header; | |
my $id = $full_id[0]; | |
my $current_sequence_length = length $sequences{$header}; | |
if(not defined $sequence_lengths{$id}){ | |
$sequence_lengths{$id} = $header; | |
} | |
else{ #Check to see if the sequence length is greater than the length of the sequence we already have stored | |
my $stored_sequence_header = $sequence_lengths{$id}; | |
my $stored_sequence_length = length $sequences{$stored_sequence_header}; | |
if($stored_sequence_length < $current_sequence_length){ | |
$sequence_lengths{$id} = $header; | |
} | |
} | |
} | |
#Now print out the stored sequences | |
foreach my $id(sort keys %sequence_lengths){ | |
my $header = $sequence_lengths{$id}; | |
my $sequence = $sequences{$header}; | |
# print STDERR "For $id, the longest sequence we found was $header \n"; | |
print join "\n", $header,$sequence; | |
print "\n"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment