Skip to content

Instantly share code, notes, and snippets.

@avrilcoghlan
Created March 1, 2013 14:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save avrilcoghlan/5065181 to your computer and use it in GitHub Desktop.
Save avrilcoghlan/5065181 to your computer and use it in GitHub Desktop.
Perl script that checks for cases where a TreeFam family seems to have disappeared from a particular version of TreeFam, even though it was present in the previous version of TreeFam and has not been curated since.
#!/usr/local/bin/perl
#
# Perl script treefam_QC12.pl
# Written by Avril Coghlan (a.coghlan@ucc.ie)
# 6-Feb-09.
#
# This perl script checks for cases where a TreeFam family seems to have
# disappeared from a particular version of TreeFam, even though it was
# present in the previous version of TreeFam and has not been curated since.
#
# The command-line format is:
# % perl <treefam_QC12.pl> <release>
# where <release> is the release of the TreeFam database to use.
#
#
#------------------------------------------------------------------#
# CHECK IF THERE ARE THE CORRECT NUMBER OF COMMAND-LINE ARGUMENTS:
$num_args = $#ARGV + 1;
if ($num_args != 1)
{
print "Usage of treefam_QC12.pl\n\n";
print "perl treefam_QC12.pl <release>\n";
print "where <release> is the release of the TreeFam database to use.\n";
print "For example, >perl -w treefam_QC12.pl 7\n";
exit;
}
#------------------------------------------------------------------#
# DECLARE MYSQL USERNAME AND HOST:
use DBI;
# FIND WHICH RELEASE OF THE TREEFAM DATABASE TO USE:
$release = $ARGV[0];
#------------------------------------------------------------------#
# FIND ALL FAMILIES IN THE FAMILYA/FAMILYB/FAMILYC TABLES, IN
# THE CURRENT TREEFAM RELEASE:
$database = "dbi:mysql:treefam_".$release.":db.treefam.org:3308";
$dbh = DBI->connect("$database", 'anonymous', '') || return;
$RELEASE = &read_all_families1($dbh);
# READ IN THE FAMILYB FAMILIES WHICH WERE TURNED INTO FAMILYA FAMILIES
# SINCE THE PREVIOUS RELEASE:
$CURATED = &read_curated_families($dbh);
# NOW DISCONNECT FROM THE DATABASE:
$rc = $dbh->disconnect();
$rc = "";
# FIND ALL FAMILIES IN THE FAMILYA/FMAILYB/FAMILYC TABLES, IN
# THE PREVIOUS TREEFAM RELEASE:
$prev_release = $release - 1;
$database = "dbi:mysql:treefam_".$prev_release.":db.treefam.org:3308";
$dbh = DBI->connect("$database", 'anonymous', '') || return;
&read_all_families2($dbh,$RELEASE,$CURATED);
# NOW DISCONNECT FROM THE DATABASE:
$rc = $dbh->disconnect();
$rc = "";
#------------------------------------------------------------------#
print STDERR "FINISHED.\n";
print "FINISHED\n";
#------------------------------------------------------------------#
# READ IN THE FAMILYB FAMILIES WHICH WERE TURNED INTO FAMILYA FAMILIES
# SINCE THE PREVIOUS RELEASE:
sub read_curated_families
{
my $dbh = $_[0];
my $table_w;
my $st;
my $sth;
my $rv;
my @array;
my $AC1;
my $AC2;
my %CURATED = ();
$table_w = "famB2A";
$st = "SELECT famB, famA from $table_w";
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n";
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr";
if ($rv >= 1)
{
while ((@array) = $sth->fetchrow_array)
{
$AC1 = $array[0]; # eg. TF300003
$AC2 = $array[1]; # eg. TF101534
$CURATED{$AC1} = $AC2;
}
}
return(\%CURATED);
}
#------------------------------------------------------------------#
# READ ALL FAMILIES IN THE CURRENT RELEASE OF TREEFAM:
sub read_all_families1
{
my $dbh = $_[0];
my $table_w;
my $st;
my $sth;
my $rv;
my @array;
my $i;
my $AC;
my %SEEN = ();
for ($i = 1; $i <= 3; $i++)
{
if ($i == 1) { $table_w = 'familyA';}
elsif ($i == 2) { $table_w = 'familyB';}
elsif ($i == 3) { $table_w = 'familyC';}
$st = "SELECT AC from $table_w";
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n";
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr";
if ($rv >= 1)
{
while ((@array) = $sth->fetchrow_array)
{
$AC = $array[0];
$SEEN{$AC} = 1;
}
}
}
print "Read all families in the TreeFam version $release\n";
print STDERR "Read all families in the TreeFam version $release\n";
return(\%SEEN);
}
#------------------------------------------------------------------#
# READ ALL FAMILIES IN THE CURRENT RELEASE OF TREEFAM:
sub read_all_families2
{
my $dbh = $_[0];
my $RELEASE = $_[1];
my $CURATED = $_[2];
my $table_w;
my $st;
my $sth;
my $rv;
my @array;
my $i;
my $AC;
my $curated;
for ($i = 1; $i <= 3; $i++)
{
if ($i == 1) { $table_w = 'familyA';}
elsif ($i == 2) { $table_w = 'familyB';}
elsif ($i == 3) { $table_w = 'familyC';}
$st = "SELECT AC from $table_w";
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n";
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr";
if ($rv >= 1)
{
while ((@array) = $sth->fetchrow_array)
{
$AC = $array[0];
# CHECK IF THIS FAMILY IS IN THE CURRENT RELEASE:
if (!($RELEASE->{$AC}))
{
# CHECK IF THE FAMILY $AC WAS CURATED:
if ($CURATED->{$AC})
{
$curated = $CURATED->{$AC};
if (!($RELEASE->{$curated}))
{
print "WARNING: family $AC was curated to $curated which is not in TreeFam release $release, even though $AC was in version $prev_release\n";
}
}
else
{
print "WARNING: family $AC is not in TreeFam version $release, but was in version $prev_release (and was not curated since)\n";
}
}
}
}
}
print STDERR "Read all families in TreeFam version $prev_release\n";
print "Read all families in TreeFam version $prev_release\n";
}
#------------------------------------------------------------------#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment