Last active
August 29, 2015 14:06
-
-
Save hyphaltip/8cb679d9bae4c5610ca2 to your computer and use it in GitHub Desktop.
Extract isolate metadata from accession names
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!env perl | |
use strict; | |
use warnings; | |
use Bio::DB::GenBank; | |
use Bio::SeqIO; | |
use Getopt::Long; | |
# remote retrieval of sequences from GenBank | |
my $db = Bio::DB::GenBank->new; | |
my $cache = 'cache'; | |
mkdir($cache) unless -d $cache; | |
my $debug = 0; | |
GetOptions( | |
'c|cache:s' => \$cache, | |
'v|debug!' => \$debug, | |
); | |
my $infile = shift || 'test.fa'; # provide filename on cmdline | |
open(my $fh => $infile) || die $!; | |
# Input will be just | |
# AY315388 | |
# AY561198 | |
my @ids; | |
while(<$fh>) { | |
my ($id) = split; | |
push @ids, $id; | |
} | |
my $out = Bio::SeqIO->new(-format => 'genbank'); | |
for my $id (@ids ) { | |
my $cache_file = File::Spec->catfile($cache,"$id.gbk"); | |
my $seq; | |
if( ! -f $cache_file ) { | |
$seq = $db->get_Seq_by_acc($id); | |
if( $seq ) { | |
Bio::SeqIO->new(-format => 'genbank', | |
-file =>">".$cache_file)->write_seq($seq); | |
} else { | |
warn("cannot find seq $seq\n"); | |
} | |
} else { | |
$seq = Bio::SeqIO->new(-format => 'genbank', | |
-file => $cache_file)->next_seq; | |
} | |
print $id, "\n"; | |
my $ann = $seq->annotation(); | |
foreach my $ref ( $ann->get_Annotations('reference') ) { | |
next if $ref->title =~ /Direct Submission/i; | |
print " REF TITLE: ",$ref->title,"\n"; | |
} | |
print " Features:\n"; | |
for my $feature ( $seq->get_SeqFeatures() ) { | |
if( $feature->primary_tag eq 'source' ) { | |
for my $tag ( $feature->get_all_tags ) { | |
next if( $tag =~ /PCR_primers/ ); | |
print join("\t", '',$tag, | |
join(";", $feature->get_tag_values($tag))),"\n"; | |
} | |
} | |
} | |
last if $debug; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment