Skip to content

Instantly share code, notes, and snippets.

@avrilcoghlan
Created March 1, 2013 14:35
Show Gist options
  • Save avrilcoghlan/5065041 to your computer and use it in GitHub Desktop.
Save avrilcoghlan/5065041 to your computer and use it in GitHub Desktop.
Perl script that retrieves all trees from the TreeFam database, and stores them in a Perl pickle
#!/usr/local/bin/perl
#
# Perl script store_treefam_trees.pl
# Written by Avril Coghlan (a.coghlan@ucc.ie)
# 3-Apr-09.
#
# For the TreeFam project.
#
# This perl script connects to the TreeFam database and stores
# the trees in a pickle.
#
# The command-line format is:
# % perl <store_treefam_trees.pl> version
# where version is the version of the TreeFam database to use.
#
#------------------------------------------------------------------#
# CHECK IF THERE ARE THE CORRECT NUMBER OF COMMAND-LINE ARGUMENTS:
$num_args = $#ARGV + 1;
if ($num_args != 1)
{
print "Usage of store_treefam_trees.pl\n\n";
print "perl store_treefam_trees.pl <version>\n";
print "where <version> is the version of the TreeFam database to use.\n";
print "For example, >perl -w store_treefam_trees.pl 7\n";
exit;
}
# FIND THE RELEASE OF TREEFAM TO USE:
$version = $ARGV[0];
# READ IN MY PERL MODULES:
use Avril_modules;
use Treefam::DBConnection;
use DBI;
use Storable;
$VERBOSE = 0;
#------------------------------------------------------------------#
# GET A LIST OF ALL FAMILIES:
$cnt = 0;
$database = "dbi:mysql:treefam_".$version.":db.treefam.org:3308";
$dbh = DBI->connect("$database", 'anonymous', '') || return;
$table_w = 'familyA';
$st = "SELECT AC from $table_w";
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n";
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr";
if ($rv >= 1)
{
while ((@array) = $sth->fetchrow_array)
{
$cnt++;
$AC = $array[0];
@families = (@families,$AC);
print "$cnt: Got $AC\n";
}
}
print "Read in list of TreeFam-A families...\n";
# GET A LIST OF ALL TREEFAM-B FAMILIES:
$dbh = DBI->connect("$database", 'anonymous', '') || return;
$table_w = 'familyB';
$st = "SELECT AC from $table_w";
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n";
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr";
if ($rv >= 1)
{
while ((@array) = $sth->fetchrow_array)
{
$cnt++;
$AC = $array[0];
@families = (@families,$AC);
print "$cnt: Got $AC\n";
}
}
$rc = $dbh->disconnect();
$rc = "";
print "Read in list of TreeFam-B families...\n";
# STORE THE TREES IN A PICKLE FILE:
%CLEAN = ();
%FULL = ();
$dbc = Treefam::DBConnection->new();
for ($i = 0; $i <= $#families; $i++)
{
$treefam_family = $families[$i];
print "$i: looking at $treefam_family\n";
if ($treefam_family eq '') { print STDERR "ERROR: treefam_family $treefam_family\n"; exit;}
$famh = $dbc->get_FamilyHandle();
if (!(defined($famh->get_by_id($treefam_family))))
{
print "WARNING: there is no family $treefam_family...\n";
goto NEXT_FAMILY;
}
$family = $famh->get_by_id($treefam_family);
if (!(defined($family->ID())))
{
print "WARNING: do not have ID stored for family $treefam_family...\n";
goto NEXT_FAMILY;
}
$AC = $family->ID(); # GET THE FAMILY ID.
if ($AC ne $treefam_family) { print STDERR "ERROR: AC $AC treefam_family $treefam_family\n"; exit;}
# GET THE CLEAN TREE FOR THE FAMILY:
if (!(defined($family->get_tree('clean'))))
{
print "WARNING: AC $AC: there is no clean tree!\n";
}
else
{
$cleantree = $family->get_tree('clean'); # GET THE TREEFAM CLEAN TREE.
if (!(defined($cleantree->nhx()))) { print STDERR "ERROR: AC $AC: there is no clean tree\n"; }
else
{
$cleantree = $cleantree->nhx();
if ($CLEAN{$treefam_family}) { print STDERR "ERROR: already have clean tree for $treefam_family\n"; exit;}
$CLEAN{$treefam_family} = $cleantree;
}
}
# GET THE FULL TREE FOR THE FAMILY:
if (!(defined($family->get_tree('full'))))
{
print "WARNING: AC $AC: there is no full tree!\n";
}
else
{
$fulltree = $family->get_tree('full');
if (!(defined($fulltree->nhx()))) { print STDERR "ERROR: AC $AC: there is no full tree\n"; }
else
{
$fulltree = $fulltree->nhx();
if ($FULL{$treefam_family}) { print STDERR "ERROR: already have full tree for $treefam_family\n"; exit;}
$FULL{$treefam_family} = $fulltree;
}
}
NEXT_FAMILY:
}
print STDERR "Read in trees...\n";
# STORE THE HASH TABLES %CLEAN AND %FULL IN A PICKLE:
$output1 = "treefam.".$version."_cleantrees";
store \%CLEAN,$output1;
$output2 = "treefam.".$version."_fulltrees";
store \%FULL,$output2;
#------------------------------------------------------------------#
print STDERR "FINISHED.\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment