Skip to content

Instantly share code, notes, and snippets.

@josephwb
Last active April 27, 2021 13:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save josephwb/02157adac3eddfbf0f297a01b8cb768e to your computer and use it in GitHub Desktop.
Save josephwb/02157adac3eddfbf0f297a01b8cb768e to your computer and use it in GitHub Desktop.
like ape::keep.tip, but can also keep internal nodes (which become new tips)
library(ape);
# like keep.tip, but you can keep internal nodes toooooo
keep.nodes <- function (phy, nds) {
# stats
Ntip <- length(phy$tip.label);
NEWROOT <- Ntip + 1;
Nnode <- phy$Nnode;
hasNL <- !is.null(phy$node.label);
# first, determine nature of passed in nds
# does not support mix of indices and names. pick one!
# if labels are passed in rather than indices, convert
if (!is.numeric(nds)) {
mm <- match(nds, phy$tip.label);
if (any(is.na(mm))) {
idx <- which(is.na(mm));
if (!hasNL) {
stop("Error: nds do not all map to tip.labels, and node.label not present");
} else {
nl <- match(nds[idx], phy$node.label);
if (any(is.na(nl))) {
stop("Failed to match nds to node.label");
} else {
# extract node index from label
mm[idx] <- nl + Ntip;
}
}
}
nds <- mm;
}
# get all nodes to be retained (including mrcas)
allnds <- mrca(phy, full=T)[nds,];
# put in nicer form
allnds <- unique(as.numeric(allnds));
# find which edges to retain
keep <- which(phy$edge[,1] %in% allnds & phy$edge[,2] %in% allnds);
phy$edge <- phy$edge[keep,];
phy$edge.length <- phy$edge.length[keep];
phy$tip.label <- phy$tip.label[phy$edge[,2][phy$edge[,2] <= Ntip]]
## find the new terminal edges
TERMS <- !(phy$edge[, 2] %in% phy$edge[, 1]);
## get the old No. of the nodes and tips that become tips:
oldNo.ofNewTips <- phy$edge[TERMS, 2];
n <- length(oldNo.ofNewTips);
phy$edge[TERMS, 2] <- rank(phy$edge[TERMS, 2]);
# need new tip labels for internal nodes that become terminals
# this will use node.label if present, otherwise will be NewTip*
node2tip <- oldNo.ofNewTips[oldNo.ofNewTips > Ntip];
new.tip.label <-
if (!length(node2tip)) {
character(0);
} else {
if (!hasNL) {
paste0("NewTip", seq(1:length(node2tip)));
} else {
phy$node.label[node2tip - Ntip];
}
}
phy$tip.label <- c(phy$tip.label, new.tip.label);
# get rid of node labels that will not exist in new tree
if (!is.null(phy$node.label)) {
phy$node.label <- phy$node.label[unique(phy$edge[,1]) - Ntip];
}
# reindex everything
phy$Nnode <- dim(phy$edge)[1] - n + 1L;
newNb <- integer(Ntip + Nnode);
newNb[NEWROOT] <- n + 1L;
sndcol <- phy$edge[, 2] > n;
newNb[sort(phy$edge[sndcol, 2])] <- (n + 2):(n + phy$Nnode);
phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]];
phy$edge[, 1] <- newNb[phy$edge[, 1]];
return(phy);
}
#############################################################
## example data
# test tree string
tstr <- "(((((f:0.1,g:0.1):0.9,e:1.0):0.3,d:1.3):0.4,c:1.7):1.3,b:3.0);";
# tree with node labels (purposely left some blank for testing)
tstr2 <- "(((((f:0.1,g:0.1):0.9,e:1.0)h:0.3,d:1.3)i:0.4,c:1.7)a:1.3,b:3.0);";
phy <- read.tree(text=tstr);
phy2 <- read.tree(text=tstr2);
plot(phy); axisPhylo(); nodelabels();
# objective: keep b, c, and 9 (making this a terminal)
#############################################################
## example 1: tree with no existing node labels
# get the node ids for the terminal you want to keep
nds <- match(c("b", "c"), phy$tip.label);
# add in the internal nodes to keep. you can find these indices with getMRCA
nds <- c(nds, getMRCA(phy, c("d", "g")));
# tree with no node labels
fie <- keep.nodes(phy, nds);
plot(fie); axisPhylo();
#############################################################
## example 2: tree HAS existing node labels ##
# label version
nds2 <- c("b", "c", "i");
# now, one with node labels
fie2 <- keep.nodes(phy2, nds);
plot(fie2); axisPhylo();
#############################################################
# finally, if your tree does not have node labels, but you do not want
# to mess around with MRCAs, give it node labels
phy3 <- phy;
phy3$node.label <- paste0("Node", seq(1:phy3$Nnode));
# plot the tree to see labels
plot(phy3); axisPhylo(); nodelabels(text=phy3$node.label);
# let's do this
fie3 <- keep.nodes(phy3, c("b", "c", "Node3"));
plot(fie3); axisPhylo();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment