Skip to content

Instantly share code, notes, and snippets.

@avrilcoghlan
Created March 1, 2013 14:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save avrilcoghlan/5065002 to your computer and use it in GitHub Desktop.
Save avrilcoghlan/5065002 to your computer and use it in GitHub Desktop.
Perl script that retrieves orthology bootstrap values for ortholog pairs for a particular pair of species from the TreeFam database, and stores the orthology bootstrap values in a Perl pickle
#!/usr/local/bin/perl
#
# Perl script store_treefam_orthostrap.pl
# Written by Avril Coghlan (a.coghlan@ucc.ie)
# 4-Apr-09.
#
# For the TreeFam project.
#
# This perl script connects to the TreeFam database and stores the orthology
# bootstrap values for orthologs for a particular pair of species in a pickle.
#
# The command-line format is:
# % perl <store_treefam_orthostrap.pl> version species1 species2
# where version is the version of the TreeFam database to use,
# species1 is the first species of interest,
# species2 is the second species of interest.
#
#------------------------------------------------------------------#
# CHECK IF THERE ARE THE CORRECT NUMBER OF COMMAND-LINE ARGUMENTS:
$num_args = $#ARGV + 1;
if ($num_args != 3)
{
print "Usage of store_treefam_orthostrap.pl\n\n";
print "perl store_treefam_orthostrap.pl <version> <species1> <species2>\n";
print "where <version> is the version of the TreeFam database to use,\n";
print " <species1> is the first species of interest,\n";
print " <species2> is the second species of interest.\n";
print "For example, >perl -w store_treefam_orthostrap.pl 7 BRUMA CAEEL\n";
exit;
}
# FIND THE RELEASE OF TREEFAM TO USE:
$version = $ARGV[0];
# FIND THE FIRST SPECIES OF INTEREST:
$species1 = $ARGV[1];
# FIND THE SECOND SPECIES OF INTEREST:
$species2 = $ARGV[2];
# READ IN MY PERL MODULES:
use Avril_modules;
use Treefam::DBConnection;
use DBI;
use Storable;
$VERBOSE = 0;
#------------------------------------------------------------------#
# GET THE TAXIDS FOR THE SPECIES:
$file1 = "treefam.".$version."_taxid";
($TAXID) = retrieve($file1);
if (!($TAXID->{$species1})) { print STDERR "ERROR: do not know taxid for $species1\n"; exit;}
$taxid1 = $TAXID->{$species1};
if (!($TAXID->{$species2})) { print STDERR "ERROR: do not know taxid for $species2\n"; exit;}
$taxid2 = $TAXID->{$species2};
# GET THE TAXID FOR THEIR LAST COMMON ANCESTOR NODE:
$lca_taxid = &get_taxid_of_lca($taxid1,$taxid2);
#------------------------------------------------------------------#
# READ IN THE GENE IDs CORRESPONDING TO IDX IDs:
$file1 = "treefam.".$version."_idx".$species1;
($SPECIES1_ID) = retrieve($file1);
$file2 = "treefam.".$version."_idx".$species2;
($SPECIES2_ID) = retrieve($file2);
#------------------------------------------------------------------#
# READ IN ALL THE ORTHOLOGY BOOTSTRAP VALUES FROM THE 'orthologs' TABLE:
# NOW FIND ALL $species1-$species2 ORTHOLOGS:
$database = 'treefam_'.$version;
$dbh = DBI->connect("dbi:mysql:$database:db.treefam.org:3308", 'anonymous', '') || return;
$table_w = 'ortholog';
$cnt = 0;
# THE COLUMNS IN THE TABLE ARE IDX1, IDX2, ORTHOLOGY BOOTSTRAP:
%ORTHOSTRAP = ();
$st = "SELECT idx1, idx2, bootstrap from $table_w WHERE taxon_id=$lca_taxid";
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n";
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr";
if ($rv >= 1)
{
while ((@array) = $sth->fetchrow_array)
{
$IDX1 = $array[0];
$IDX2 = $array[1];
$orthostrap = $array[2];
$ID1 = "none";
$ID2 = "none";
if ($SPECIES1_ID->{$IDX1} && $SPECIES2_ID->{$IDX2})
{
$ID1 = $SPECIES1_ID->{$IDX1};
$ID2 = $SPECIES2_ID->{$IDX2};
}
elsif ($SPECIES1_ID->{$IDX2} && $SPECIES2_ID->{$IDX1})
{
$ID1 = $SPECIES1_ID->{$IDX2};
$ID2 = $SPECIES2_ID->{$IDX1};
}
if ($ID1 ne 'none' && $ID2 ne 'none')
{
$cnt++;
print "Storing orthostrap for $cnt: $ID1 and $ID2\n";
$pair1 = $ID1."_".$ID2;
$pair2 = $ID2."_".$ID1;
if ($ORTHOSTRAP{$pair1})
{
$the_orthostrap= $ORTHOSTRAP{$pair1};
if ($the_orthostrap eq 'zero') { $the_orthostrap = 0;}
# THIS SHOULDN'T REALLY HAPPEN BUT IN TREEFAM-4, HAD SEVERAL TRANSCRIPTS OF ONE GENE IN A TREE:
if ($the_orthostrap ne $orthostrap)
{
if ($orthostrap < $the_orthostrap)
{
$orthostrap = $the_orthostrap;
}
}
}
if ($ORTHOSTRAP{$pair2})
{
$the_orthostrap = $ORTHOSTRAP{$pair2};
if ($the_orthostrap eq 'zero') { $the_orthostrap = 0;}
# THIS SHOULDN'T REALLY HAPPEN BUT IN TREEFAM-4, HAD SEVERAL TRANSCRIPTS OF ONE GENE IN A TREE:
if ($the_orthostrap ne $orthostrap)
{
if ($orthostrap < $the_orthostrap)
{
$orthostrap = $the_orthostrap;
}
}
}
if ($orthostrap == 0) { $orthostrap = "zero";}
$ORTHOSTRAP{$pair1} = $orthostrap;
$ORTHOSTRAP{$pair2} = $orthostrap;
}
}
}
print STDERR "Read in orthology bootstrap values for $species1 and $species2 genes\n";
#------------------------------------------------------------------#
# STORE THE HASH TABLE %ORTHOSTRAP IN A PICKLE:
$output1 = "treefam.".$version."_orthostrap".$species1."_".$species2;
store \%ORTHOSTRAP,$output1;
#------------------------------------------------------------------#
print STDERR "FINISHED.\n";
#------------------------------------------------------------------#
sub get_taxid_of_lca
{
my $taxid1 = $_[0];
my $taxid2 = $_[1];
my $taxid_lca = "none";
my $table_w;
my %ANCESTOR1 = ();
my $taxid;
my $ancestor;
my $st;
my $sth;
my $rv;
my @array;
$database = 'treefam_'.$version;
$dbh = DBI->connect("dbi:mysql:$database:db.treefam.org:3308", 'anonymous', '') || return;
$table_w = 'spec_nodes';
$taxid = $taxid1;
LOOP_AGAIN1:
# FIND THE ANCESTORS OF $taxid:
$st = "SELECT parent_id from $table_w WHERE taxon_id=$taxid";
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n";
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr";
if ($rv >= 1)
{
while ((@array) = $sth->fetchrow_array)
{
$ancestor = $array[0];
$ANCESTOR1{$ancestor} = 1;
$taxid = $ancestor;
goto LOOP_AGAIN1;
}
}
$taxid = $taxid2;
LOOP_AGAIN2:
# FIND THE ANCESTORS OF $taxid:
$st = "SELECT parent_id from $table_w WHERE taxon_id=$taxid";
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n";
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr";
if ($rv >= 1)
{
while ((@array) = $sth->fetchrow_array)
{
$ancestor = $array[0];
if ($ANCESTOR1{$ancestor}) { $taxid_lca = $ancestor; goto FOUND_LCA;}
$taxid = $ancestor;
goto LOOP_AGAIN2;
}
}
FOUND_LCA:
if ($taxid_lca eq 'none') { print STDERR "ERROR: taxid1 $taxid1 taxid2 $taxid2 taxid_lca $taxid_lca\n"; exit;}
return($taxid_lca);
}
#------------------------------------------------------------------#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment