Skip to content

Instantly share code, notes, and snippets.

@awitney
Created October 4, 2013 15:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save awitney/6827524 to your computer and use it in GitHub Desktop.
Save awitney/6827524 to your computer and use it in GitHub Desktop.
fetching genes from ensembl
#!/usr/bin/env perl
use strict;
use warnings;
use Bio::EnsEMBL::LookUp;
use Bio::SeqIO;
use Bio::Seq;
my $string = $ARGV[0] || 'gyrA';
my $strain = $ARGV[1] || 'pseudomonas_aeruginosa*';
my $type = $ARGV[2] || 'dna';
my $seqout = Bio::SeqIO->new( -format => 'Fasta', -fh => \*STDOUT );
# load the lookup from the main Ensembl Bacteria public server
my $lookup = Bio::EnsEMBL::LookUp->new(
-URL => "http://bacteria.ensembl.org/registry.json",
-NO_CACHE => 1
);
#my @dbas = @{$lookup->get_all_by_name_pattern('escherichia_coli_%')};
my @dbas;
if ( $strain =~ m/\*/ ) {
@dbas = @{$lookup->get_all_by_name_pattern($strain)};
} else {
@dbas = @{$lookup->get_by_name_exact($strain)};
}
print STDERR "Found: ".@dbas." adaptors\n";
foreach my $dba ( @dbas ) {
my $genes = $dba->get_GeneAdaptor()->fetch_all_by_external_name($string);
print STDERR "\tFound ".@{$genes}." genes for ".$dba->species()."\n";
foreach my $gene ( @{$genes} ) {
print STDERR "\t\t".$gene->stable_id." [".$gene->display_id."] [".$gene->external_name."] -- ".($gene->description ? $gene->description : '')."\n";
my $description = ($gene->description ? $gene->description : '');
$description .= ' ['.$dba->species().']';
my $exons = $gene->get_all_Exons();
die "\nError: more than one exon\n" if @{$exons} > 1;
my $exon = $exons->[0];
my $transcripts = $gene->get_all_Transcripts();
die "\nError: more than one transcript\n" if @{$transcripts} > 1;
my $transcript = $transcripts->[0];
my $seq_string = $type eq 'dna' ? $exon->seq->seq() : $transcript->translate()->seq();
my $seq = Bio::Seq->new( -display_id => $gene->stable_id, -seq => $seq_string, -description => $description );
$seqout->write_seq($seq);
# foreach my $exon ( @{$exons} ) {
# }
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment