Skip to content

Instantly share code, notes, and snippets.

@cjfields
Forked from anonymous/entrenzgeneid
Created March 8, 2012 18:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cjfields/2002399 to your computer and use it in GitHub Desktop.
Save cjfields/2002399 to your computer and use it in GitHub Desktop.
EntrezGeneID get sequence
#!/usr/bin/perl
use strict;
use warnings;
use Bio::DB::EUtilities;
# this needs to be a list of EntrezGene unique IDs
my @ids = @ARGV;
my $eutil = Bio::DB::EUtilities->new(-eutil => 'esearch',
-email => 'mymail@foo.bar',
-db => 'gene',
-term => join(',',@ids));
my @gene_ids = $eutil->get_ids;
$eutil = Bio::DB::EUtilities->new(-eutil => 'esummary',
-email => 'mymail@foo.bar',
-db => 'gene',
-id => \@gene_ids);
my $fetcher = Bio::DB::EUtilities->new(-eutil => 'efetch',
-email => 'mymail@foo.bar',
-db => 'nucleotide',
-rettype => 'gb');
while (my $docsum = $eutil->next_DocSum) {
# to ensure we grab the right ChrStart information, we grab the Item above
# it in the Item hierarchy (visible via print_all from the eutil instance)
my ($item) = $docsum->get_Items_by_name('GenomicInfoType');
my %item_data = map {$_ => 0} qw(ChrAccVer ChrStart ChrStop);
while (my $sub_item = $item->next_subItem) {
if (exists $item_data{$sub_item->get_name}) {
$item_data{$sub_item->get_name} = $sub_item->get_content;
}
}
# check to make sure everything is set
for my $check (qw(ChrAccVer ChrStart ChrStop)) {
die "$check not set" unless $item_data{$check};
}
my $strand = $item_data{ChrStart} > $item_data{ChrStop} ? 2 : 1;
printf("Retrieving %s, from %d-%d, strand %d\n", $item_data{ChrAccVer},
$item_data{ChrStart},
$item_data{ChrStop},
$strand
);
$fetcher->set_parameters(-id => $item_data{ChrAccVer},
-seq_start => $item_data{ChrStart} -5000 ,
-seq_stop => $item_data{ChrStop} + 5000 ,
-strand => $strand);
print $fetcher->get_Response->content;
}
#!/usr/bin/perl
use strict;
use warnings;
use Bio::DB::EUtilities;
# this needs to be a list of EntrezGene unique IDs
my @ids = @ARGV;
my $eutil = Bio::DB::EUtilities->new(-eutil => 'esearch',
-email => 'mymail@foo.bar',
-db => 'gene',
-term => join(',',@ids));
print join(",",$eutil->get_ids);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment