Skip to content

Instantly share code, notes, and snippets.

@avrilcoghlan
Created March 1, 2013 14:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save avrilcoghlan/5065063 to your computer and use it in GitHub Desktop.
Save avrilcoghlan/5065063 to your computer and use it in GitHub Desktop.
Perl script that finds TreeFam families that are lacking a tree in the 'trees' table of the TreeFam mysql database.
#!/usr/local/bin/perl
#
# Perl script treefam_QC2.pl
# Written by Avril Coghlan (a.coghlan@ucc.ie)
# 3-Feb-09.
#
# This perl script finds TreeFam families that are lacking a tree in the
# 'trees' table.
#
# The command-line format is:
# % perl <treefam_QC2.pl> <release>
# where <release> is the release of the TreeFam database to use.
#
#
#------------------------------------------------------------------#
# CHECK IF THERE ARE THE CORRECT NUMBER OF COMMAND-LINE ARGUMENTS:
$num_args = $#ARGV + 1;
if ($num_args != 1)
{
print "Usage of treefam_QC2.pl\n\n";
print "perl treefam_QC2.pl <release>\n";
print "where <release> is the release of the TreeFam database to use.\n";
print "For example, >perl -w treefam_QC2.pl 7\n";
exit;
}
#------------------------------------------------------------------#
# DECLARE MYSQL USERNAME AND HOST:
use DBI;
# FIND WHICH RELEASE OF THE TREEFAM DATABASE TO USE:
$release = $ARGV[0];
#------------------------------------------------------------------#
# READ IN A LIST OF ALL THE FAMILIES FOR WHICH THERE ARE TREES
# IN TREEFAM:
$database = "dbi:mysql:treefam_".$release.":db.treefam.org:3308";
$dbh = DBI->connect("$database", 'anonymous', '') || return;
$TREES = &read_families_with_trees($dbh);
# NOW READ IN ALL THE FAMILIES IN THIS RELEASE OF TREEFAM, AND CHECK
# THAT EACH FAMILY HAS A SEED, CLEAN AND FULL TREE:
&check_that_each_family_has_trees($dbh,$TREES);
$rc = $dbh->disconnect();
$rc = "";
#------------------------------------------------------------------#
print STDERR "FINISHED.\n";
#------------------------------------------------------------------#
# READ IN THE FAMILIES FOR WHICH THERE ARE TREES IN TREEFAM:
sub read_families_with_trees
{
my $dbh = $_[0];
my $table_w;
my $st;
my $sth;
my $rv;
my @array;
my %TREES = ();
my $AC;
my $TYPE;
my $TREE;
# THE TREES CAN HAVE TYPE: SEED/FULL/CLEAN
$table_w = 'trees';
$st = "SELECT AC, TYPE, TREE from $table_w";
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n";
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr";
if ($rv >= 1)
{
while ((@array) = $sth->fetchrow_array)
{
$AC = $array[0];
$TYPE = $array[1];
$TREE = $array[2];
if ($TREE =~ /\(/) # IF THE TREE IS NOT EMPTY
{
if ($TREES{$AC."=".$TYPE})
{
print "WARNING: Already have $TYPE tree for $AC in table $table_w\n";
}
$TREES{$AC."=".$TYPE} = 1;
}
}
}
print STDERR "Read in families with trees\n";
return(\%TREES);
}
#------------------------------------------------------------------#
# NOW READ IN ALL THE FAMILIES IN THIS RELEASE OF TREEFAM, AND CHECK
# THAT EACH FAMILY HAS A SEED, CLEAN AND FULL TREE:
sub check_that_each_family_has_trees
{
my $dbh = $_[0];
my $TREES = $_[1];
my $table_w;
my $st;
my $sth;
my $rv;
my @array;
my $i;
my %SEEN = ();
my $key;
my @temp;
for ($i = 1; $i <= 3; $i++)
{
if ($i == 1) { $table_w = 'familyA'; }
elsif ($i == 2) { $table_w = 'familyB'; }
elsif ($i == 3) { $table_w = 'familyC'; }
$st = "SELECT AC from $table_w";
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n";
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr";
if ($rv >= 1)
{
while ((@array) = $sth->fetchrow_array)
{
$AC = $array[0];
$SEEN{$AC} = 1;
# CHECK THAT THIS FAMILY HAS A SEED, CLEAN AND FULL TREE:
# NOTE THAT A TF3XXXXXX FAMILY SHOULDN'T HAVE SEED TREE:
if (substr($AC,0,3) eq 'TF1')
{
if (!($TREES->{$AC."=SEED"}))
{
print "WARNING: there is no SEED tree for $AC in table trees\n";
}
}
if (!($TREES->{$AC."=CLEAN"}))
{
print "WARNING: there is no CLEAN tree for $AC in table trees\n";
}
if (!($TREES->{$AC."=FULL"}))
{
print "WARNING: there is no FULL tree for $AC in table trees\n";
}
}
}
}
# CHECK THAT THERE ARE NO TREES IN THE trees TABLES FOR FAMILIES
# THAT DO NOT APPEAR IN THE familyA, familyB or familyC TABLES:
foreach $key (keys %{$TREES})
{
@temp = split(/=/,$key);
$AC = $temp[0];
if (!($SEEN{$AC}))
{
print "WARNING: $AC appears in the trees table but not in familyA/familyB/familyC tables\n";
}
}
}
#------------------------------------------------------------------#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment