Skip to content

Instantly share code, notes, and snippets.

@avrilcoghlan
Created March 1, 2013 16:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save avrilcoghlan/5065840 to your computer and use it in GitHub Desktop.
Save avrilcoghlan/5065840 to your computer and use it in GitHub Desktop.
Perl script that prints out all the genes in a particular TreeFam family.
#!/usr/local/bin/perl
#
# Perl script treefam_4_genes.pl
# Written by Avril Coghlan (alc@sanger.ac.uk)
# 31-Oct-07.
#
# For the TreeFam project.
#
# This perl script prints out all the genes in a TreeFam-4 family.
#
# The command-line format is:
# % perl <treefam_4_genes.pl> family
# where family is the family identifier.
#
#------------------------------------------------------------------#
# CHECK IF THERE ARE THE CORRECT NUMBER OF COMMAND-LINE ARGUMENTS:
$num_args = $#ARGV + 1;
if ($num_args != 1)
{
print "Usage of treefam_4_genes.pl\n\n";
print "perl treefam_4_genes.pl <family>\n";
print "where <family> is the family identifier.\n";
print "For example, >perl -w treefam_4_genes.pl TF101073\n";
exit;
}
# READ IN MY PERL MODULES:
BEGIN {
unshift (@INC, '/nfs/team71/phd/alc/perl/modules');
}
use lib "/nfs/team71/phd/alc/perl/treefam/Treefam_api_3may07/";
use DBI;
use Treefam::DBConnection;
# FIND THE NAME OF THE INPUT FAMILY:
$family_id = $ARGV[0];
#------------------------------------------------------------------#
# GET ALL THE GENES IN THIS FAMILY:
$dbc = Treefam::DBConnection->new();
$famh = $dbc->get_FamilyHandle();
$family = $famh->get_by_id($family_id);
if (!(defined($family)))
{
print "Do not have $family_id in the database...\n";
exit;
}
my $tree = $family->get_tree('clean');
# FIRST PRINT ALL THE GENES IN THE FAMILY:
my @leaves = $tree->get_leaves;
foreach my $leaf(@leaves)
{
if (defined($leaf->sequence_id())) # FOR SOME LEAVES, WE DON'T SEE TO HAVE
# A SEQUENCE ID, eg. SEE RICE GENE IN http://www.treefam.org/cgi-bin/TFinfo.pl?ac=TF101025
{
my $id = $leaf->sequence_id();
# GET THE ENSEMBL/ORIGINAL GENE NAME:
my $gene_handle = $dbc->get_GeneHandle();
if (defined($gene_handle->get_by_sequence_id($id))) { $gene = $gene_handle->get_by_sequence_id($id);}
else { print STDERR "ERROR: cannot get gene for id $id\n"; exit; }
if (defined($gene->ID)) { $gene_id = $gene->ID;}
else { $gene_id = "unknown";}
print "HAVE GENE $gene_id\n";
}
}
#------------------------------------------------------------------#
print STDERR "FINISHED.\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment