Skip to content

Instantly share code, notes, and snippets.

@andrewyatz
Created September 23, 2011 13:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save andrewyatz/1237394 to your computer and use it in GitHub Desktop.
Save andrewyatz/1237394 to your computer and use it in GitHub Desktop.
An example of extracting UTR coordinates from an Ensembl transcript
use Bio::EnsEMBL::Registry;
Bio::EnsEMBL::Registry->load_registry_from_db(
-HOST => 'ensembldb.ensembl.org',
-PORT => 5306,
-USER => 'anonymous',
-VERBOSE => 0,
-DB_VERSION => 64
);
my $dba = Bio::EnsEMBL::Registry->get_DBAdaptor('human', 'core');
my $ta = $dba->get_TranscriptAdaptor();
my @ids = qw/ENST00000330803 ENST00000366472 ENST00000318244/;
foreach my $id (@ids) {
my $t = $ta->fetch_by_stable_id($id);
my $strand = $t->strand();
my $seq_start = $t->seq_region_start();
my $seq_end = $t->seq_region_end();
my ($five_prime_coordinate) = $t->cdna2genomic($t->cdna_coding_start(), $t->cdna_coding_start());
my ($three_prime_coordinate) = $t->cdna2genomic($t->cdna_coding_end(), $t->cdna_coding_end());
my ($five_start, $five_end, $three_start, $three_end) =( (0)x4 );
if($strand == 1) {
if($seq_start != $five_prime_coordinate->start()) {
$five_start = $seq_start;
$five_end = $five_prime_coordinate->start()-1;
}
if($seq_end != $three_prime_coordinate->start()) {
$three_start = $three_prime_coordinate->start()+1;
$three_end = $seq_end;
}
}
else {
if($seq_start != $five_prime_coordinate->start()) {
$five_start = $seq_end;
$five_end = $five_prime_coordinate->start()+1;
}
if($seq_end != $three_prime_coordinate->start()) {
$three_start = $three_prime_coordinate->start()-1;
$three_end = $seq_start;
}
}
printf(q{5' UTR %d-%d | 3' UTR %d-%d}, $five_start, $five_end, $three_start, $three_end);
print "\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment