Skip to content

Instantly share code, notes, and snippets.

Created December 19, 2012 01:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/4333695 to your computer and use it in GitHub Desktop.
Save anonymous/4333695 to your computer and use it in GitHub Desktop.
C program to get the neighbor of a node from a tree in the Newick format. See also http://www.biostars.org/p/59246/.
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#define KNERR_MISSING_LEFT 0x01
#define KNERR_MISSING_RGHT 0x02
#define KNERR_BRACKET 0x04
#define KNERR_COLON 0x08
typedef struct {
int parent, n;
int *child;
char *name;
double d;
} knhx1_t;
typedef struct __kstring_t {
size_t l, m;
char *s;
} kstring_t;
typedef struct {
int error, n, max;
knhx1_t *node;
} knaux_t;
static inline char *add_node(const char *s, knaux_t *aux, int x)
{
char *p, *nbeg, *nend = 0;
knhx1_t *r;
if (aux->n == aux->max) {
aux->max = aux->max? aux->max<<1 : 8;
aux->node = (knhx1_t*)realloc(aux->node, sizeof(knhx1_t) * aux->max);
}
r = aux->node + (aux->n++);
r->n = x; r->parent = -1;
for (p = (char*)s, nbeg = p, r->d = -1.0; *p && *p != ',' && *p != ')'; ++p) {
if (*p == '[') {
if (nend == 0) nend = p;
do ++p; while (*p && *p != ']');
if (*p == 0) {
aux->error |= KNERR_BRACKET;
break;
}
} else if (*p == ':') {
if (nend == 0) nend = p;
r->d = strtod(p + 1, &p);
--p;
} else if (!isgraph(*p)) if (nend == 0) nend = p;
}
if (nend == 0) nend = p;
if (nend != nbeg) {
r->name = (char*)calloc(nend - nbeg + 1, 1);
strncpy(r->name, nbeg, nend - nbeg);
} else r->name = strdup("");
return p;
}
knhx1_t *kn_parse(const char *nhx, int *_n, int *_error)
{
char *p;
int *stack, top, max;
knaux_t *aux;
knhx1_t *ret;
#define __push_back(y) do { \
if (top == max) { \
max = max? max<<1 : 16; \
stack = (int*)realloc(stack, sizeof(int) * max); \
} \
stack[top++] = (y); \
} while (0) \
stack = 0; top = max = 0;
p = (char*)nhx;
aux = (knaux_t*)calloc(1, sizeof(knaux_t));
while (*p) {
while (*p && !isgraph(*p)) ++p;
if (*p == 0) break;
if (*p == ',') ++p;
else if (*p == '(') {
__push_back(-1);
++p;
} else if (*p == ')') {
int x = aux->n, m, i;
for (i = top - 1; i >= 0; --i)
if (stack[i] < 0) break;
m = top - 1 - i;
p = add_node(p + 1, aux, m);
aux->node[x].child = (int*)calloc(m, sizeof(int));
for (i = top - 1, m = m - 1; m >= 0; --m, --i) {
aux->node[x].child[m] = stack[i];
aux->node[stack[i]].parent = x;
}
top = i;
__push_back(x);
} else {
__push_back(aux->n);
p = add_node(p, aux, 0);
}
}
*_n = aux->n;
*_error = aux->error;
ret = aux->node;
free(aux); free(stack);
return ret;
}
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
static inline int kputsn(const char *p, int l, kstring_t *s)
{
if (s->l + l + 1 >= s->m) {
s->m = s->l + l + 2;
kroundup32(s->m);
s->s = (char*)realloc(s->s, s->m);
}
memcpy(s->s + s->l, p, l);
s->l += l; s->s[s->l] = 0;
return l;
}
static inline int kputc(int c, kstring_t *s)
{
if (s->l + 1 >= s->m) {
s->m = s->l + 2;
kroundup32(s->m);
s->s = (char*)realloc(s->s, s->m);
}
s->s[s->l++] = c; s->s[s->l] = 0;
return c;
}
static void format_node_recur(const knhx1_t *node, const knhx1_t *p, kstring_t *s, char *numbuf)
{
if (p->n) {
int i;
kputc('(', s);
for (i = 0; i < p->n; ++i) {
if (i) kputc(',', s);
format_node_recur(node, &node[p->child[i]], s, numbuf);
}
kputc(')', s);
if (p->name) kputsn(p->name, strlen(p->name), s);
if (p->d >= 0) {
sprintf(numbuf, ":%g", p->d);
kputsn(numbuf, strlen(numbuf), s);
}
} else kputsn(p->name, strlen(p->name), s);
}
void kn_format(const knhx1_t *node, int root, kstring_t *s) // TODO: get rid of recursion
{
char numbuf[128];
format_node_recur(node, &node[root], s, numbuf);
}
int main(int argc, char *argv[])
{
int i, j, n, error;
knhx1_t *node;
kstring_t str;
if (argc < 3) {
fprintf(stderr, "Usage: get-nei <in.nh> <leaf-name>\n");
return 1;
}
node = kn_parse(argv[1], &n, &error);
str.l = str.m = 0; str.s = 0;
for (i = 0; i < n; ++i) {
const knhx1_t *p = &node[i], *q = &node[p->parent], *p0 = p;
if (strcmp(p->name, argv[2]) == 0) {
for (j = 0; j < q->n; ++j) {
p = &node[q->child[j]];
if (p == p0) continue;
kn_format(node, p - node, &str);
printf("%s\t%s\t%s\n", p0->name, q->name, str.s);
}
}
}
// TODO: deallocate
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment