Skip to content

Instantly share code, notes, and snippets.

@avrilcoghlan
Created March 1, 2013 14:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save avrilcoghlan/5064833 to your computer and use it in GitHub Desktop.
Save avrilcoghlan/5064833 to your computer and use it in GitHub Desktop.
Perl script that, given a list of Schistosoma mansoni genes, connects to the TreeFam database to find out which families they are in
#!/usr/local/bin/perl
#
# Perl script find_treefam_for_schisto_gene2.pl
# Written by Avril Coghlan (a.coghlan@ucc.ie)
# 21-Mar-09.
#
# This perl script reads in a list of Schistosoma genes, and connects
# to the TreeFam mysql database to find out which family they are in.
#
# The command-line format is:
# % perl <find_treeFam_for_schisto_gene2.pl> version list
# where version is the version of the TreeFam database to use,
# list is the list of Schistosoma genes.
#
#------------------------------------------------------------------#
# CHECK IF THERE ARE THE CORRECT NUMBER OF COMMAND-LINE ARGUMENTS:
$num_args = $#ARGV + 1;
if ($num_args != 2)
{
print "Usage of find_treefam_for_schisto_gene2.pl\n\n";
print "perl find_treefam_for_schisto_gene2.pl <version> <list>\n";
print "where <version> is the version of the TreeFam database to use,\n";
print " <list> is the list of Schistosoma genes.\n";
print "For example, >perl find_treefam_for_schisto_gene2.pl 7 Table20.txt\n";
exit;
}
# DECLARE MYSQL USERNAME AND HOST:
use DBI;
# FIND THE VERSION OF THE TREEFAM DATABASE TO USE:
$version = $ARGV[0];
# FIND THE NAME OF THE INPUT LIST OF SCHISTOSOMA GENES:
$list = $ARGV[1];
#------------------------------------------------------------------#
# CONNECT TO THE TREEFAM DATABASE:
$database = "dbi:mysql:treefam_".$version.":db.treefam.org:3308";
$dbh = DBI->connect("$database", 'anonymous', '') || return
# READ IN THE LIST OF SCHISTOSOMA GENES:
print STDERR "Opening file $list...\n";
open(LIST,"$list") || die "ERROR: cannot open $list\n";
while(<LIST>)
{
$line = $_;
chomp $line;
@temp = split(/\s+/,$line);
$schisto = $temp[0];
# GET THE TREEFAM FAMILY FOR THIS SCHISTOSOMA TRANSCRIPT:
$fam = &get_fam($schisto,$dbh);
@fams = split(/\,/,$fam);
@fams = sort @fams;
if ($#fams > 0) { print STDERR "WARNING: $schisto: several families ($fam)\n";}
print "$schisto";
for ($i = 0; $i <= $#fams; $i++)
{
if ($fams[$i] ne 'none')
{
$link = "http://www.treefam.org/cgi-bin/TFinfo.pl?ac=".$fams[$i];
$hyperlink = "=HYPERLINK(\"$link\",\"$fams[$i]\")";
print " $hyperlink";
}
}
if ($#fams == 0 && $fams[0] eq 'none') { print " none";}
print "\n";
}
close(LIST);
$rc = $dbh->disconnect();
$rc = "";
#------------------------------------------------------------------#
print STDERR "FINISHED.\n";
#------------------------------------------------------------------#
# GET THE TREEFAM FAMILY FOR A SCHISTOSOMA TRANSCRIPT:
sub get_fam
{
my $transcript = $_[0];
my $dbh = $_[1];
my $table_w;
my $gene;
my $st;
my $sth;
my $rv;
my @array;
my $fam = 'none';
my $fam2 = 'none';
my @temp;
my $transcript2;
my $gene2;
my %HAVE = ();
@temp = split(/\./,$transcript); # eg. Smp_028440.1
$gene = $temp[0]; # eg. Smp_028440
$table_w = 'fam_genes';
$st = "SELECT AC, ID from $table_w WHERE ID LIKE \'$gene\%\'";
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n";
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr";
if ($rv >= 1)
{
while ((@array) = $sth->fetchrow_array)
{
$transcript2 = $array[1];
@temp = split(/\./,$transcript2);
$gene2 = $temp[0];
if ($gene2 eq $gene)
{
# RECORD THE FAMILIES THAT THIS GENE APPEARS IN:
if (!($HAVE{$array[0]})) # IF WE HAVEN'T ALREADY RECORDED THIS FAMILY FOR THIS GENE:
{
$HAVE{$array[0]} = 1;
# DIFFERENT TRANSCRIPTS OF A GENE CAN HAVE BEST MATCHES TO DIFFERENT FAMILIES:
if ($fam eq 'none') { $fam = $array[0]; }
else
{
$fam = $fam.",".$array[0];
}
}
# RECORD THE FAMILIES FOR THIS TRANSCRIPT:
if ($transcript2 eq $transcript)
{
if ($fam2 eq 'none') { $fam2 = $array[0]; }
else
{
$fam2 = $fam2.",".$array[0];
}
}
}
}
}
# CHECK IF WE FOUND A FAMILY FOR THIS TRANSCRIPT:
if ($fam2 ne 'none')
{
return($fam2);
}
else
{
return($fam);
}
}
#------------------------------------------------------------------#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment