Skip to content

Instantly share code, notes, and snippets.

@avrilcoghlan
Created March 1, 2013 14:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save avrilcoghlan/5064852 to your computer and use it in GitHub Desktop.
Save avrilcoghlan/5064852 to your computer and use it in GitHub Desktop.
Perl script that finds TreeFam families that have Caenorhabditis elegans and Brugia malayi genes, and prints out the number of genes from each species in the trees for each of those families
#!/usr/local/bin/perl
#
# Perl script find_treefam_with_Ce_Bm.pl
# Written by Avril Coghlan (a.coghlan@ucc.ie)
# 17-Apr-09
#
# This perl script finds TreeFam families that have Ce and Bm genes, and prints out
# how many genes are in the tree.
#
# The command-line format is:
# % perl <find_treefam_with_Ce_Bm.pl> <release>
# where <release> is the release of the TreeFam database to use.
#
#
#------------------------------------------------------------------#
# CHECK IF THERE ARE THE CORRECT NUMBER OF COMMAND-LINE ARGUMENTS:
$num_args = $#ARGV + 1;
if ($num_args != 1)
{
print "Usage of find_treefam_with_Ce_Bm.pl\n\n";
print "perl find_treefam_with_Ce_Bm.pl <release>\n";
print "where <release> is the release of the TreeFam database to use.\n";
print "For example, >perl -w find_treefam_with_Ce_Bm.pl 7\n";
exit;
}
#------------------------------------------------------------------#
# DECLARE MYSQL USERNAME AND HOST:
use DBI;
use lib "/home/bcri/acoghlan/perlmodulesAvril/"; # edit to point to the TreeFam perl api xxx
use Treefam::DBConnection;
# FIND WHICH RELEASE OF THE TREEFAM DATABASE TO USE:
$release = $ARGV[0];
#------------------------------------------------------------------#
# FIND ALL FAMILIES IN THE FAMILYA/FAMILYB/FAMILYC TABLES, IN
# THE CURRENT TREEFAM RELEASE:
$database = "dbi:mysql:treefam_".$release.":db.treefam.org:3308";
$dbh = DBI->connect("$database", 'anonymous', '') || return;
$FAMILIES = &read_all_families1($dbh);
# CHECK IF THERE ARE CE AND BM GENES IN THE TREES FOR FAMILIES:
&find_Ce_Bm_genes($FAMILIES,$dbh);
# NOW DISCONNECT FROM THE DATABASE:
$rc = $dbh->disconnect();
$rc = "";
#------------------------------------------------------------------#
print STDERR "FINISHED.\n";
print "FINISHED\n";
#------------------------------------------------------------------#
# READ IN THE TREES FOR FAMILIES, AND FIND CE AND BM GENES:
sub find_Ce_Bm_genes
{
my $FAMILIES = $_[0];
my $dbh = $_[1];
my $family;
my $table_w;
my $st;
my $sth;
my $rv;
my @array;
my $no_ce;
my $no_bm;
my $id;
foreach $family (keys %{$FAMILIES})
{
# GET THE TREE FOR THIS FAMILY:
$no_ce = 0;
$no_bm = 0;
$table_w = "fam_genes";
# THE ortholog TABLE SEEMS TO USE THE FULL TREE:
$st = "SELECT ID from $table_w WHERE AC=\'$family\' AND FLAG=\'FULL\'";
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n";
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr";
if ($rv >= 1)
{
while ((@array) = $sth->fetchrow_array)
{
$id = $array[0];
@temp = split(/\./,$id);
if (substr($id,0,3) eq 'Bm1') { $no_bm++;}
else
{
if (substr($id,0,2) ne 'At' && substr($id,0,3) ne 'BGI' && substr($id,0,3) ne 'CBG' &&
substr($id,0,3) ne 'ENS' && !($id =~ /-/) && substr($id,0,4) ne 'NEWS' &&
substr($id,0,3) ne 'cr0' && !($id =~ /_/) && substr($id,0,2) ne 'FB' && ($#temp==2 || $#temp == 3))
{
$no_ce++;
print "family $family ce $id\n";
}
}
}
}
print "family $family no_ce $no_ce no_bm $no_bm\n";
}
}
#------------------------------------------------------------------#
# READ ALL FAMILIES IN THE CURRENT RELEASE OF TREEFAM:
sub read_all_families1
{
my $dbh = $_[0];
my $table_w;
my $st;
my $sth;
my $rv;
my @array;
my $i;
my $AC;
my %SEEN = ();
for ($i = 1; $i <= 3; $i++)
{
if ($i == 1) { $table_w = 'familyA';}
elsif ($i == 2) { $table_w = 'familyB';}
elsif ($i == 3) { $table_w = 'familyC';}
$st = "SELECT AC from $table_w";
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n";
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr";
if ($rv >= 1)
{
while ((@array) = $sth->fetchrow_array)
{
$AC = $array[0];
$SEEN{$AC} = 1;
}
}
}
print "Read all families in the TreeFam version $release\n";
print STDERR "Read all families in the TreeFam version $release\n";
return(\%SEEN);
}
#------------------------------------------------------------------#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment