Skip to content

Instantly share code, notes, and snippets.

@mdondrup
Last active September 11, 2023 07:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mdondrup/d4e3d869bb83be14687bfb88172f26ae to your computer and use it in GitHub Desktop.
Save mdondrup/d4e3d869bb83be14687bfb88172f26ae to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
#### This is now becoming a flexible and chaotic Taxonomy tool ;)
use strict;
use warnings;
use Bio::DB::Taxonomy;
use Bio::Taxonomy::Taxon;
use Carp qw |confess croak|;
use Cwd qw |cwd|;
my @nodes = ();
use Getopt::Std;
our($opt_d, $opt_t, $opt_q, $opt_f, $opt_g, $opt_G, $opt_R); # t for taxids only (for ITOL)
getopts('d:tqf:g:GR');
my $idx_dir = $opt_d || cwd();
my ($nodefile,$namesfile) = ('nodes.dmp','names.dmp');
my $db;
if (-f $idx_dir.$nodefile and -f $idx_dir.$namesfile) {
$db = new Bio::DB::Taxonomy(-source => 'flatfile',
-nodesfile =>$idx_dir. $nodefile,
-namesfile => $idx_dir.$namesfile);
} else {
warn "nodesfile or namesfile not found, using entrez online data\n";
$db = new Bio::DB::Taxonomy((-source => 'entrez'));
}
#my $arthropod = $db->get_Taxonomy_Node(-name=>'Arthropoda');
#my $lep = $db->get_Taxonomy_Node(-name=>'Lepeophtheirus');
my %gi_taxa = ();
my @qnodes = ();
my @taxa = ();
### Allow reading taxon list from text file. One Taxon per line.
if ($opt_f) {
open TAX, $opt_f or die "couldn't open taxon file $!";
while (<TAX>) {
chomp;
push @taxa, $_;
}
} else {
@taxa = @ARGV;
}
foreach (@taxa) {
my $node1;
if (/^\d+$/){
$node1 = $db->get_Taxonomy_Node(-taxonid => $_);
} else {
s/[^\w\d\s]/ /g; $node1 = $db->get_Taxonomy_Node(-name => $_);
}# look for the name
die "$_ not found\n" unless ref $node1;
if ($opt_t) {
print ($node1->id. "\n");
next;
} elsif ($opt_q) {
push @qnodes, "txid".$node1->id."[Organism]";
if (scalar @qnodes == scalar @ARGV) {
print join (" OR ", @qnodes ), "\n";
}
next;
} elsif ($opt_g) {
## make a gi list instead by storing the taxids in the hash
$gi_taxa{$node1->id} = ();
if ($opt_R) {
## add all descendents also:
map {$gi_taxa{$_->id} = ()} $node1->get_Descendents();
}
next;
} else {
print "\n######################### $_ ##############################\n";
unless (ref $node1) { (print " '$_' not found\n"); next; };
my $tree1 = Bio::Tree::Tree->new(-node => $node1);
my @lineage = $tree1->get_lineage_nodes($node1);
@lineage = map {$_->scientific_name} @lineage;
my $line = join ':', @lineage;
my $phylum = $tree1->find_node(-rank => 'phylum');
print "ID: ". $node1->id, " Sci.name: ", $node1->scientific_name, "\n [", join " ", ($node1->common_name), "]\n Phylum: ".(($phylum) ? $phylum->scientific_name : "NA"). "\n $line". "\n";
push @nodes, $node1;
}
}
if ($opt_g) {
### open gi to tax mapping
open GI, $opt_g or die "couldn't open GI file $!";
GI: while (<GI>) {
chomp;
## adapted to the 4-column format, but uses only the accessions
my ($acc, $accv, $tax, $gi) = split;
next GI unless exists $gi_taxa{$tax}; ## store only relevant taxa
push @{$gi_taxa{$tax}}, $acc;
}
print STDERR "read gis for ". scalar keys (%gi_taxa). " taxa.\n";
foreach my $taxi (keys %gi_taxa) {
if ($opt_G) {
print "Found ". scalar @{$gi_taxa{$taxi}}. " for $taxi\n";
} else {
print (join "\n", @{$gi_taxa{$taxi}},"\n") ;
}
}
}
exit unless @nodes >= 2 ;
my $tree = Bio::Tree::Tree->new();
my $lca = $tree->get_lca(@nodes);
print "\n############################ LCA #################################\n";
print "no LCA!" unless ref $lca;
print "LCA: ".$lca-> ncbi_taxid." ". $lca->scientific_name()."\n";
# print "LCA-is subtree of Arthropoda: ".(allSubTax($arthropod, @nodes) ? "yes":"no"), "\n";
#my $tree2 = Bio::Tree::Tree->new(-node=>$lca);
my @lineage = $tree->get_lineage_nodes($lca);
confess "empty lineage \n" unless @lineage;
my @line = map {$_->scientific_name} (@lineage,$lca);
print "Lineage of LCA: ", ( join ":", @line), "\n";
sub allSubTax {
#my $taxname = shift;
my $tree = Bio::Tree::Tree->new();
my $cPhylum = shift;
my $isCPhylum = $tree->get_lca(@_, $cPhylum);
return ($isCPhylum->id == $cPhylum->id);
}
__END__
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment