Last active
October 12, 2015 20:08
-
-
Save avrilcoghlan/4081016 to your computer and use it in GitHub Desktop.
Perl script to get all the protein sequences in a family in a particular version of the TreeFam database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/perl | |
=head1 NAME | |
get_treefam_family_seqs.pl | |
=head1 SYNOPSIS | |
get_treefam_family_seqs.pl treefam_version output outputdir core_only | |
where treefam_version is the version of TreeFam to use, | |
outputdir is the output directory for writing output files, | |
output is the output file name, | |
core_only says whether to take sequences from core species only (yes/no). | |
=head1 DESCRIPTION | |
This script makes a file with the protein sequences in each family in a | |
particular version (<treefam_version>) of the TreeFam database. The output | |
is written in file <output>. | |
=head1 VERSION | |
Perl script last edited 8-Nov-2012. | |
=head1 CONTACT | |
alc@sanger.ac.uk (Avril Coghlan) | |
=cut | |
# | |
# Perl script get_treefam_family_seqs.pl | |
# Written by Avril Coghlan (alc@sanger.ac.uk) | |
# 8-Nov-12. | |
# Last edited 8-Nov-2012. | |
# SCRIPT SYNOPSIS: get_treefam_family_seqs.pl: make a file with protein sequences in each family from a particular version of TreeFam | |
# | |
#------------------------------------------------------------------# | |
# CHECK IF THERE ARE THE CORRECT NUMBER OF COMMAND-LINE ARGUMENTS: | |
use strict; | |
use warnings; | |
use DBI; | |
my $num_args = $#ARGV + 1; | |
if ($num_args != 4) | |
{ | |
print "Usage of get_treefam_family_seqs.pl\n\n"; | |
print "perl get_treefam_family_seqs.pl <treefam_version> <output> <outputdir> <core_only>\n"; | |
print "where <treefam_version> is the version of TreeFam to use,\n"; | |
print " <output> is the output file name,\n"; | |
print " <outputdir> is the output directory for writing output files,\n"; | |
print " <core_only> says whether to take sequences from core species only (yes/no)\n"; | |
print "For example, >perl get_treefam_family_seqs.pl 8 treefam8_seqs /nfs/users/nfs_a/alc/Documents/GeneWise50Helminths yes\n"; | |
exit; | |
} | |
# FIND THE VERSION OF TREEFAM TO USE: | |
my $treefam_version = $ARGV[0]; | |
# FIND THE NAME FOR THE OUTPUT FILE: | |
my $output = $ARGV[1]; | |
# FIND THE DIRECTORY TO USE FOR OUTPUT FILES: | |
my $outputdir = $ARGV[2]; | |
# FIND OUT WHETHER TO TAKE SEQUENCES FROM CORE SPECIES ONLY: | |
my $coreonly = $ARGV[3]; | |
#------------------------------------------------------------------# | |
# TEST SUBROUTINES: | |
my $PRINT_TEST_DATA = 0; # SAYS WHETHER TO PRINT DATA USED DURING TESTING. | |
&test_print_error; | |
#------------------------------------------------------------------# | |
# RUN THE MAIN PART OF THE CODE: | |
&run_main_program($outputdir,$treefam_version,$output,$coreonly); | |
print STDERR "FINISHED.\n"; | |
#------------------------------------------------------------------# | |
# RUN THE MAIN PART OF THE CODE: | |
sub run_main_program | |
{ | |
my $outputdir = $_[0]; # DIRECTORY TO PUT OUTPUT FILES IN. | |
my $treefam_version = $_[1]; # THE VERSION OF THE TREFAM DATABASE TO USE. | |
my $output = $_[2]; # OUTPUT FILE NAME | |
my $coreonly = $_[3]; # SAYS WHETHER TO TAKE SEQUENCES FROM CORE SPECIES ONLY. | |
my $errorcode; # RETURNED AS 0 IF THERE IS NO ERROR. | |
my $errormsg; # RETURNED AS 'none' IF THERE IS NO ERROR. | |
my $familiesA; # AN ARRAY OF TREEFAM-A FAMILIES | |
my $familiesB; # AN ARRAY OF TREEFAM-B FAMILIES | |
my $ids_file; # FILE WITH THE IDS FOR THE SEQUENCES IN EACH FAMILY | |
my $genelist; # FILE WITH IDENTIFIERS OF SEQUENCES FROM SPECIES THAT WERE ARE INTERESTED IN | |
my $genelist2; # SHORTER FILE WITH IDENTIFIERS OF SEQUENCES FROM SPECIES THAT WERE ARE INTERESTED IN | |
# CHECK IF $coreonly IS 'yes' OR 'no': | |
if ($coreonly ne 'yes' && $coreonly ne 'no') | |
{ | |
$errormsg = "ERROR: run_main_program: core_only $coreonly (should be yes/no)\n"; | |
$errorcode = 2; # ERRORCODE=2 | |
($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); | |
} | |
# MAKE A FILE WITH THE IDENTIFIERS OF SEQUENCES FROM SPECIES THAT WE ARE INTERESTED IN: | |
if ($coreonly eq 'yes') | |
{ | |
($genelist,$genelist2,$errorcode,$errormsg) = &make_genelist($treefam_version,$outputdir); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
} | |
else | |
{ | |
$genelist = "none"; | |
$genelist2 = "none"; | |
} | |
# GET A LIST OF ALL THE TREEFAM-A FAMILIES: | |
($familiesA,$errorcode,$errormsg) = &get_family_list($treefam_version,"A"); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
# GET A LIST OF ALL THE TREEFAM-B FAMILIES: | |
($familiesB,$errorcode,$errormsg) = &get_family_list($treefam_version,"B"); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
# GET THE IDS THE SEQUENCES IN ALL FAMILIES: | |
print STDERR "Making ids_file...\n"; | |
($ids_file,$errorcode,$errormsg) = &get_ids_for_families($familiesA,$familiesB,$treefam_version,$outputdir,$genelist,$genelist2,$coreonly); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
# GET THE SEQUENCES IN ALL FAMILIES: | |
print STDERR "Getting the sequences for the families...\n"; | |
($errorcode,$errormsg) = &get_seqs_for_families($ids_file,$outputdir,$output,$treefam_version); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
} | |
#------------------------------------------------------------------# | |
# MAKE A FILE WITH THE IDENTIFIERS OF SEQUENCES FROM SPECIES THAT WE ARE INTERESTED IN: | |
sub make_genelist | |
{ | |
my $treefam_version = $_[0]; # VERSION OF THE TREEFAM DATABASE TO USE | |
my $outputdir = $_[1]; # DIRECTORY FOR WRITING OUTPUT FILES TO | |
my $genelist; # FILE FOR WRITING THE LIST OF SEQUENCES TO | |
my $random_number; # RANDOM NUMBER TO USE IN TEMPORARY FILE NAMES | |
my $database; # TREEFAM DATABASE TO CONNECT TO | |
my $dbh; # | |
my $table_w; # TABLE TO QUERY FROM THE DATABASE | |
my @species; # ARRAY OF SPECIES WE ARE INTERESTED IN | |
my $num_species; # NUMBER OF SPECIES WE ARE INTERESTED IN | |
my $species; # A SPECIES WE ARE INTERESTED IN | |
my $i; # | |
my $st; # | |
my $sth; # | |
my $rv; # | |
my @array; # | |
my $id; # IDENTIFIER FOR SEQUENCE | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = 'none';# RETURNED AS 'none' IF THERE IS NO ERROR | |
my $genelist2; # A SECOND FILE OF SPECIES TO TAKE, WITH FEWER SPECIES | |
# OPEN A NEW FILE FOR WRITING THE LIST OF SEQUENCES: | |
$random_number = rand(); | |
$genelist = $outputdir."/tmp".$random_number; | |
open(GENELIST,">$genelist") || die "ERROR: make_genelist: cannot open $genelist\n"; | |
$random_number = rand(); | |
$genelist2 = $outputdir."/tmp".$random_number; | |
open(GENELIST2,">$genelist2") || die "ERROR: make_genelist2: cannot open $genelist2\n"; | |
# CONNECT TO THE DATABASE AND GET ALL SEQUENCES OF PARTICULAR SPECIES: | |
$database = "treefam_".$treefam_version; | |
$dbh = DBI->connect("dbi:mysql:$database:db.treefam.org:3308", 'anonymous', '') || return; | |
# GET THE SEQUENCES FROM CERTAIN SPECIES FROM THE DATABASE: | |
$table_w = "genes"; | |
@species = ("6239", # CAENORHABDITIS ELEGANS | |
"6238", # CAENOBHABDITIS BRIGGSAE | |
"6279", # BRUGIA MALAYI | |
"7227", # DROSOPHILA MELANOGASTER | |
"7234", # DROSOPHILA PERSIMILIS | |
"7165", # ANOPHELES GAMBIAE | |
"7159", # AEDES AEGYPTI | |
"6183", # SCHISTOSOMA MANSONI | |
"45351",# NEMATOSTELLA VECTENSIS | |
"7719", # CIONA INTESTINALIS | |
"9606", # HUMAN | |
"9598", # CHIMP | |
"9544", # MACAQUE | |
"10116",# RAT | |
"10090",# MOUSE | |
"9913", # COW | |
"9615", # DOG | |
"13616",# MONODELPHIS DOMESTICA | |
"9031", # CHICKEN | |
"7955", # ZEBRAFISH | |
"8364", # XENOPUS TROPICALIS | |
"99883",# TETRAODON NIGROVIRIDIS | |
"69293",# GASTEROSTEUS ACULEATUS (STICKLEBACK) | |
"4932", # SACCHAROMYCES CEREVISIAE | |
"4896", # SCHIZOSACCHAROMYCES POMBE | |
"3702", # ARABIDOPSIS THALIANA | |
"7668", # STRONGYLOCENTROTUS PURPURATUS | |
"44689");# DICTYOSTELIUM DISCOIDEUM | |
$num_species = $#species + 1; | |
for ($i = 1; $i <= $num_species; $i++) | |
{ | |
$species = $species[($i-1)]; | |
$st = "SELECT ID from $table_w WHERE TAX_ID=\'$species\'"; | |
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n"; | |
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr"; | |
if ($rv >= 1) | |
{ | |
while ((@array) = $sth->fetchrow_array) | |
{ | |
$id = $array[0]; | |
print GENELIST "$id\n"; | |
if ($species eq '6329' || $species eq '7227' || $species eq '6183' || # C. ELEGANS, D. MELANOGASTER, S. MANSONI | |
$species eq '45351'|| $species eq '7719' || $species eq '9606' || # N. VECTENSIS, C. INTESTINALIS, HUMAN | |
$species eq '4932' || # S. CEREVISIAE | |
$species eq '3702' || $species eq '7668' || $species eq '44689') # A. THALIANA, S. PURPURATUS, D. DICOIDEUM | |
{ | |
print GENELIST2 "$id\n"; | |
} | |
} | |
} | |
} | |
close(GENELIST); | |
close(GENELIST2); | |
$dbh->disconnect(); | |
return($genelist,$genelist2,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# GET A LIST OF ALL THE TREEFAM FAMILIES OF A PARTICULAR TYPE ("A" OR "B"): | |
# SUBROUTINE SYNOPSIS: get_family_list(): get a list of all TreeFam families of a particular type (A/B) | |
sub get_family_list | |
{ | |
my $treefam_version = $_[0]; # VERSION OF TREEFAM THAT WE WANT TO GET FAMILIES FOR, eg. 8 | |
my $type = $_[1]; # TYPE OF FAMILY ("A" OR "B") | |
my $dbh; # | |
my $database; # VERSION OF THE TREEFAM DATABASE, eg. treefam_8 | |
my $table_w; # THE DATABASE TABLE OF INTEREST | |
my $st; # | |
my $sth; # | |
my $rv; # | |
my @array; # | |
my $AC; # TREEFAM ACCESSION FOR A FAMILY | |
my @families; # ARRAY OF TREEFAM FAMILIES | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
if ($type ne 'A' && $type ne 'B') | |
{ | |
$errormsg = "ERROR: get_family_list: type $type\n"; | |
$errorcode = 1; # ERRORCODE=1 | |
return(\@families,$errorcode,$errormsg); | |
} | |
$database = "treefam_".$treefam_version; | |
# CONNECT TO THE DATABASE AND GET ALL FAMILIES OF THIS TYPE: | |
$dbh = DBI->connect("dbi:mysql:$database:db.treefam.org:3308", 'anonymous', '') || return(\@families,$errorcode,$errormsg); | |
# GET ALL THE FAMILIES OF TYPE $type: | |
if ($type eq 'A') { $table_w = "familyA"; } | |
elsif ($type eq 'B') { $table_w = "familyB";} | |
$st = "SELECT AC from $table_w"; | |
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n"; | |
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr"; | |
if ($rv >= 1) | |
{ | |
while ((@array) = $sth->fetchrow_array) | |
{ | |
$AC = $array[0]; | |
@families = (@families,$AC); | |
if ($#families % 100 == 0) { print STDERR "Read $#families families now (of type $type) ...\n"; } | |
} | |
} | |
$dbh->disconnect(); | |
return(\@families,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# GET THE SEQUENCES IN ALL FAMILIES: | |
# SUBROUTINE get_seqs_for_families(): get the sequences for TreeFam families from the database | |
sub get_seqs_for_families | |
{ | |
my $ids_file = $_[0]; # FILE WITH IDS OF SEQUENCES IN FAMILIES | |
my $outputdir = $_[1]; # DIRECTORY TO PUT OUTPUT FILES INTO | |
my $output = $_[2]; # OUTPUT FILE | |
my $treefam_version = $_[3]; # VERSION OF THE TREEFAM DATABASE TO USE | |
my $line; # | |
my @temp; # | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my $family = "none";# TREEFAM FAMILY ID. | |
my $id; # TREEFAM SEQUENCE ID. | |
my $database; # TREEFAM DATABASE TO CONNECT TO | |
my $dbh; # | |
my $table_w; # DATABASE TABLE TO USE | |
my $st; # | |
my $sth; # | |
my $rv; # | |
my $seq; # SEQUENCE FOR A TREEFAM ID. | |
my @array; # | |
# OPEN THE OUTPUT FILE: | |
$output = $outputdir."/".$output; | |
open(OUTPUT,">$output") || die "ERROR: get_seqs_for_families: cannot open $output\n"; | |
# CONNECT TO THE DATABASE: | |
$database = "treefam_".$treefam_version; | |
$dbh = DBI->connect("dbi:mysql:$database:db.treefam.org:3308", 'anonymous', '') || return; | |
$table_w = "aa_seq"; | |
# READ THROUGH THE SEQUENCES AND GET ALL THE SEQUENCES FOR ALL FAMILIES: | |
open(IDS_FILE,"$ids_file") || die "ERROR: get_seqs_for_families: cannot open $ids_file\n"; | |
while(<IDS_FILE>) | |
{ | |
$line = $_; | |
chomp $line; | |
@temp = split(/\s+/,$line); | |
if ($#temp == 0) | |
{ | |
if (substr($line,0,2) eq 'TF' && length($temp[0]) == 8 && $family eq 'none') | |
{ | |
$family = $temp[0]; | |
print STDERR "Getting sequences for family $family...\n"; | |
print OUTPUT "$line\n"; | |
} | |
elsif ($line eq '#END') | |
{ | |
$family = 'none'; | |
print OUTPUT "$line\n"; | |
} | |
else | |
{ | |
$id = $temp[0]; | |
$st = "SELECT SEQ from $table_w WHERE ID=\'$id\'"; | |
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n"; | |
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr"; | |
if ($rv >= 1) | |
{ | |
while ((@array) = $sth->fetchrow_array) | |
{ | |
$seq = $array[0]; | |
# REMOVE *s FROM THE SEQUENCE: | |
$seq =~ s/\*//g; | |
# REMOVE .s FROM THE SEQUENCE: | |
$seq =~ s/\*//g; | |
print OUTPUT ">$id\n"; | |
print OUTPUT "$seq\n"; | |
} | |
} | |
} | |
} | |
} | |
close(IDS_FILE); | |
close(OUTPUT); | |
return($errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# GET THE IDS FOR THE SEQUENCES FOR A FAMILY: | |
# SUBROUTINE SYNOPSIS: get_ids_for_family(): get the IDs for the sequences in a TreeFam family | |
sub get_ids_for_family | |
{ | |
my $family = $_[0]; # A TREEFAM FAMILY | |
my $treefam_version = $_[1]; # VERSION OF THE TREEFAM DATABASE TO USE | |
my $genelist = $_[2]; # LIST OF GENES FROM SPECIES THAT WE ARE INTERESTED IN | |
my $genelist2 = $_[3]; # SHORTER LIST OF GENES FROM SPECIES THAT WE ARE INTERESTED IN | |
my $ids_file = $_[4]; # FILE WITH IDENTIFIERS FOR A FAMILY | |
my $coreonly = $_[5]; # SAYS WHETHER TO ONY TAKE GENES FROM CORE SPECIES | |
my $database; # TREEFAM DATABASE TO USE | |
my $dbh; # | |
my $table_w; # DATABASE TABLE TO USE | |
my $st; # | |
my $sth; # | |
my $rv; # | |
my @array; # | |
my $id; # TREEFAM SEQUENCE ID. | |
my $on_genelist = 0; # SAYS WHETHERE A GENE APPEARS ON $genelist | |
my $line; # | |
my @ids; # ARRAY OF IDENTIFIERS THAT WE WANT TO TAKE | |
my $no_ids; # NUMBER OF IDENTIFIERS IN ARRAY @ids | |
my $i; # | |
my $errorcode = 0; # RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";# RETURNED AS 'none' IF THERE IS NO ERROR | |
my @temp; # | |
# CONNECT TO THE DATABASE AND GET ALL SEQUENCES FOR THE FAMILY: | |
$database = "treefam_".$treefam_version; | |
$dbh = DBI->connect("dbi:mysql:$database:db.treefam.org:3308", 'anonymous', '') || return; | |
# GET THE SEQUENCES FROM THE FAMILY FROM THE DATABASE: | |
$table_w = "fam_genes"; | |
$st = "SELECT ID from $table_w WHERE AC=\'$family\'"; | |
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n"; | |
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr"; | |
if ($rv >= 1) | |
{ | |
while ((@array) = $sth->fetchrow_array) | |
{ | |
$id = $array[0]; | |
if ($treefam_version == 8) | |
{ | |
@temp = split(/_/,$id); | |
$id = $temp[0]; | |
} | |
if ($coreonly eq 'yes') | |
{ | |
# CHECK IF $id APPEARS ON $genelist: | |
$on_genelist = 0; | |
open(TMP,"grep \'$id\' $genelist |"); | |
while(<TMP>) | |
{ | |
$line = $_; | |
chomp $line; | |
if ($line eq $id) { $on_genelist = 1;} | |
} | |
close(TMP); | |
} | |
else | |
{ | |
$on_genelist = 1; | |
} | |
if ($on_genelist == 1) | |
{ | |
@ids = (@ids,$id); | |
} | |
} | |
} | |
# IF THERE ARE MORE THAN 100 SEQUENCES, ONLY TAKE THE SEQUENCES FROM A FEW SPECIES: | |
if ($#ids >= 100) | |
{ | |
$no_ids = $#ids + 1; | |
for ($i = 1; $i <= $no_ids; $i++) | |
{ | |
$id = $ids[($i-1)]; | |
if ($coreonly eq 'yes') | |
{ | |
# CHECK IF THIS IS IN THE SHORTER FILE OF IDs FROM SPECIES TO TAKE: | |
# CHECK IF $id APPEARS ON $genelist2: | |
$on_genelist = 0; | |
open(TMP,"grep \'$id\' $genelist2 |"); | |
while(<TMP>) | |
{ | |
$line = $_; | |
chomp $line; | |
if ($line eq $id) { $on_genelist = 1;} | |
} | |
close(TMP); | |
} | |
else | |
{ | |
$on_genelist = 1; | |
} | |
if ($on_genelist == 0) # RECORD THAT THIS $id IS NOT IN $genelist2: | |
{ | |
$ids[$i] = "NA"; | |
} | |
} | |
} | |
# IF THERE ARE <=5 SEQUENCES, TAKE ALL THE SEQUENCES FROM THE FAMILY: | |
if ($#ids <= 5) | |
{ | |
@ids = (); | |
$table_w = "fam_genes"; | |
$st = "SELECT ID from $table_w WHERE AC=\'$family\'"; | |
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n"; | |
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr"; | |
if ($rv >= 1) | |
{ | |
while ((@array) = $sth->fetchrow_array) | |
{ | |
$id = $array[0]; | |
if ($treefam_version == 8) | |
{ | |
@temp = split(/_/,$id); | |
$id = $temp[0]; | |
} | |
@ids = (@ids,$id); | |
} | |
} | |
} | |
# WRITE THE IDENTIFIERS OUT TO FILE $ids_file: | |
$no_ids = $#ids + 1; | |
open(IDS_FILE,">>$ids_file") || die "ERROR: get_ids_for_family: cannot open $ids_file\n"; | |
print IDS_FILE "$family\n"; | |
for ($i = 1; $i <= $no_ids; $i++) | |
{ | |
$id = $ids[($i-1)]; | |
if ($id ne 'NA') | |
{ | |
print IDS_FILE "$id\n"; | |
} | |
} | |
print IDS_FILE "#END\n"; | |
close(IDS_FILE); | |
$dbh->disconnect(); | |
return($errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# GET THE IDS FOR THE SEQUENCES FOR ALL FAMILIES: | |
# SUBROUTINE get_ids_for_families(): get the IDs for sequences in all TreeFam families | |
sub get_ids_for_families | |
{ | |
my $familiesA = $_[0]; ## ARRAY OF TREEFAM-A FAMILIES | |
my $familiesB = $_[1]; ## ARRAY OF TREEFAM-B FAMILIES | |
my $treefam_version = $_[2]; ## VERSION OF TREEFAM TO USE | |
my $outputdir = $_[3]; ## DIRECTORY FOR WRITING OUTPUT FILES TO | |
my $genelist = $_[4]; ## FILE WITH GENES FROM THE SPECIES WE WANT TO TAKE | |
my $genelist2 = $_[5]; ## SHORTER FILE WITH GENES FROM THE SPECIES WE WANT TO TAKE | |
my $coreonly = $_[6]; ## SAYS WHETHER TO ONLY TAKE GENES FROM CORE SPECIES | |
my $errorcode = 0; ## RETURNED AS 0 IF THERE IS NO ERROR | |
my $errormsg = "none";## RETURNED AS 'none' IF THERE IS NO ERROR | |
my $num_familiesA; ## NUMBER OF TREEFAM-A FAMILIES | |
my $num_familiesB; ## NUMBER OF TREEFAM-B FAMILIES | |
my $i; ## | |
my $family; ## TREEFAM FAMILY | |
my $ids_file; ## FILE WITH THE IDS OF SEQUENCES IN EACH FAMILY | |
my $random_number; ## RANDOM NUMBER TO USE IN TEMPORARY FILE NAME | |
# OPEN A FILE WITH THE IDS OF SEQUENCES IN EACH FAMILY: | |
$random_number = rand(); | |
$ids_file = $outputdir."/tmp".$random_number; | |
open(IDS_FILE,">$ids_file") || die "ERROR: get_ids_for_families: cannot open $ids_file\n"; | |
close(IDS_FILE); | |
# GET THE IDs FOR EACH FAMILY FROM THE DATABASE: | |
$num_familiesA = @$familiesA; | |
$num_familiesB = @$familiesB; | |
for ($i = 1; $i <= $num_familiesA; $i++) | |
{ | |
$family = $familiesA->[($i-1)]; | |
print STDERR "Getting ids for family $family...\n"; | |
($errorcode,$errormsg)= &get_ids_for_family($family,$treefam_version,$genelist,$genelist2,$ids_file,$coreonly); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
} | |
for ($i = 1; $i <= $num_familiesB; $i++) | |
{ | |
$family = $familiesB->[($i-1)]; | |
print STDERR "Getting ids for family $family...\n"; | |
($errorcode,$errormsg)= &get_ids_for_family($family,$treefam_version,$genelist,$genelist2,$ids_file,$coreonly); | |
if ($errorcode != 0) { ($errorcode,$errormsg) = &print_error($errormsg,$errorcode,0); } | |
} | |
return($ids_file,$errorcode,$errormsg); | |
} | |
#------------------------------------------------------------------# | |
# TEST &print_error | |
sub test_print_error | |
{ | |
my $errormsg; # RETURNED AS 'none' FROM A FUNCTION IF THERE WAS NO ERROR | |
my $errorcode; # RETURNED AS 0 FROM A FUNCTION IF THERE WAS NO ERROR | |
($errormsg,$errorcode) = &print_error(45,45,1); | |
if ($errorcode != 12) { print STDERR "ERROR: test_print_error: failed test1\n"; exit;} | |
($errormsg,$errorcode) = &print_error('My error message','My error message',1); | |
if ($errorcode != 11) { print STDERR "ERROR: test_print_error: failed test2\n"; exit;} | |
($errormsg,$errorcode) = &print_error('none',45,1); | |
if ($errorcode != 13) { print STDERR "ERROR: test_print_error: failed test3\n"; exit;} | |
($errormsg,$errorcode) = &print_error('My error message', 0, 1); | |
if ($errorcode != 13) { print STDERR "ERROR: test_print_error: failed test4\n"; exit;} | |
} | |
#------------------------------------------------------------------# | |
# PRINT OUT AN ERROR MESSAGE AND EXIT. | |
sub print_error | |
{ | |
my $errormsg = $_[0]; # THIS SHOULD BE NOT 'none' IF AN ERROR OCCURRED. | |
my $errorcode = $_[1]; # THIS SHOULD NOT BE 0 IF AN ERROR OCCURRED. | |
my $called_from_test = $_[2]; # SAYS WHETHER THIS WAS CALLED FROM test_print_error OR NOT | |
if ($errorcode =~ /[A-Z]/ || $errorcode =~ /[a-z]/) | |
{ | |
if ($called_from_test == 1) | |
{ | |
$errorcode = 11; $errormsg = "ERROR: print_error: the errorcode is $errorcode, should be a number.\n"; # ERRORCODE=11 | |
return($errormsg,$errorcode); | |
} | |
else | |
{ | |
print STDERR "ERROR: print_error: the errorcode is $errorcode, should be a number.\n"; | |
exit; | |
} | |
} | |
if (!($errormsg =~ /[A-Z]/ || $errormsg =~ /[a-z]/)) | |
{ | |
if ($called_from_test == 1) | |
{ | |
$errorcode = 12; $errormsg = "ERROR: print_error: the errormessage $errormsg does not seem to contain text.\n"; # ERRORCODE=12 | |
return($errormsg,$errorcode); | |
} | |
else | |
{ | |
print STDERR "ERROR: print_error: the errormessage $errormsg does not seem to contain text.\n"; | |
exit; | |
} | |
} | |
if ($errormsg eq 'none' || $errorcode == 0) | |
{ | |
if ($called_from_test == 1) | |
{ | |
$errorcode = 13; $errormsg = "ERROR: print_error: errormsg $errormsg, errorcode $errorcode.\n"; # ERRORCODE=13 | |
return($errormsg,$errorcode); | |
} | |
else | |
{ | |
print STDERR "ERROR: print_error: errormsg $errormsg, errorcode $errorcode.\n"; | |
exit; | |
} | |
} | |
else | |
{ | |
print STDERR "$errormsg"; | |
exit; | |
} | |
return($errormsg,$errorcode); | |
} | |
#------------------------------------------------------------------# | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment