Skip to content

Instantly share code, notes, and snippets.

@benfulton
Created January 26, 2022 22:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benfulton/dbb1be9edd3a023093fb5a6cee3a03af to your computer and use it in GitHub Desktop.
Save benfulton/dbb1be9edd3a023093fb5a6cee3a03af to your computer and use it in GitHub Desktop.
Reverse engineering the R APE package tree reader, to determine how the nodes are numbered
/* tree_build.c 2020-02-12 */
/* Copyright 2008-2020 Emmanuel Paradis, 2017 Klaus Schliep */
/* This file is part of the R-package `ape'. */
/* See the file ../COPYING for licensing issues. */
//#include <R.h>
//#include <Rinternals.h>
#include <cmath>
#include <string>
#include <vector>
#include <iostream>
using namespace std;
struct tree_data
{
vector<int> Nnode, edge;
vector<double> edge_length;
vector<string> node_label;
vector<string> tip_label;
int nnode = 0;
vector<double> root_edge;
};
void extract_portion_Newick(string x, int a, int b, char *y)
{
int i, j;
for (i = a, j = 0; i <= b; i++, j++) y[j] = x[i];
y[j] = '\0';
}
void decode_internal_edge(string x, int a, int b, char *lab, double *w)
{
int co = a;
char *endstr, str[100];
while (x[co] != ':' && co <= b) co++;
if (a == co) lab[0] = '\0'; /* if no node label */
else extract_portion_Newick(x, a, co - 1, lab);
if (co < b) {
extract_portion_Newick(x, co + 1, b, str);
*w = strtod(str, &endstr);
} else *w = NAN;
}
void decode_terminal_edge(string x, int a, int b, char *tip, double *w)
{
int co = a;
char *endstr, str[100];
while (x[co] != ':' && co <= b) co++;
extract_portion_Newick(x, a, co - 1, tip);
if (co < b) {
extract_portion_Newick(x, co + 1, b, str);
*w = strtod(str, &endstr);
} else *w = NAN;
}
#define ADD_INTERNAL_EDGE \
phy.edge[j] = curnode; \
phy.edge[j + nedge] = curnode = ++node; \
stack_internal[k++] = j; \
j++
#define GO_DOWN \
decode_internal_edge(x, ps + 1, pt - 1, lab, &tmpd); \
phy.node_label[curnode - 1 - ntip] = lab; \
l = stack_internal[--k]; \
phy.edge_length[l] = tmpd; \
curnode = phy.edge[l]
void add_terminal_edge_tiplabel(tree_data& phy, int&curtip, int&j, int curnode, string x, int pr, int ps, char *tip, int nedge)
{
double tmpd;
phy.edge[j] = curnode;
decode_terminal_edge(x, pr + 1, ps - 1, tip, &tmpd);
if (phy.tip_label.size() < curtip) phy.tip_label.resize(curtip);
phy.tip_label[curtip - 1] = tip;
phy.edge[j + nedge] = curtip;
phy.edge_length[j] = tmpd;
curtip++;
j++;
}
#define ADD_TERMINAL_EDGE_TIPLABEL add_terminal_edge_tiplabel(phy, curtip, j, curnode, x, pr, ps, tip, nedge)
tree_data treeBuild(string nwk)
{
string x;
int n, i, ntip = 1, nleft = 0, nright = 0, nedge, curnode, node, j, nsk = 0, ps, pr, pt, l, k, stack_internal[10000], curtip = 1;
double *el, tmpd;
char lab[512], tip[512];
tree_data phy;
phy.tip_label.resize(1);
x = nwk;
n = x.size();
vector<int> skeleton(n);
for (i = 0; i < n; i++) {
if (x[i] == '(') {
skeleton[nsk] = i;
nsk++;
nleft++;
continue;
}
if (x[i] == ',') {
skeleton[nsk] = i;
nsk++;
ntip++;
continue;
}
if (x[i] == ')') {
skeleton[nsk] = i;
nsk++;
nright++;
phy.nnode++;
}
}
if (nleft != nright) perror("numbers of left and right parentheses in Newick string not equaln");
nedge = ntip + phy.nnode - 1;
phy.Nnode.resize(1);
phy.edge.resize(nedge*2);
phy.edge_length.resize(nedge);
phy.node_label.resize(phy.nnode);
phy.Nnode[0] = phy.nnode;
curnode = node = ntip + 1;
k = j = 0;
for (i = 1; i < nsk - 1; i++) {
ps = skeleton[i];
if (x[ps] == '(') {
ADD_INTERNAL_EDGE;
continue;
}
pr = skeleton[i - 1];
if (x[ps] == ',') {
if (x[pr] != ')') {
ADD_TERMINAL_EDGE_TIPLABEL;
}
continue;
}
if (x[ps] == ')') {
pt = skeleton[i + 1];
if (x[pr] == ',') {
ADD_TERMINAL_EDGE_TIPLABEL;
GO_DOWN;
continue;
}
if (x[pr] == '(') {
ADD_TERMINAL_EDGE_TIPLABEL;
GO_DOWN;
continue;
}
if (x[pr] == ')') {
GO_DOWN;
}
}
}
pr = skeleton[nsk - 2];
ps = skeleton[nsk - 1];
if (x[pr] == ',' && x[ps] == ')') {
ADD_TERMINAL_EDGE_TIPLABEL;
}
if (ps < n - 2) {
i = ps + 1;
while (i < n - 2 && x[i] != ':') i++;
if (i < n - 2) {
decode_internal_edge(x, ps + 1, n - 2, lab, &tmpd);
phy.root_edge.push_back(tmpd);
phy.node_label[0] = lab;
} else {
extract_portion_Newick(x, ps + 1, n - 2, lab);
phy.node_label[0] = lab;
}
}
return phy;
}
int main()
{
// auto t = treeBuild("(A:1,B:1):1");
// auto t = treeBuild("((((cat:68.710687,horse:68.710687):4.566771,cow:73.277458):20.722542,(((((chimp:4.444178,human:4.444178):6.682660,orang:11.126837):2.285866,gibbon:13.412704):7.211528,(macaque:4.567239,baboon:4.567239):16.056993):16.060691,marmoset:36.684923):57.315077)mancat:38.738115,(rat:36.302467,mouse:36.302467):96.435648)");
auto t = treeBuild("((Parasteatoda_tepidariorum<285>_1:138.531,Stegodyphus_mimosarum<284>_1:138.531)<297>_1:187.093,Centruroides_sculpturatus<296>_3:325.625)<313>_1:46.2175;");
cout << "Tips: " << t.tip_label.size() << endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment