josephwb/keep.nodes.R

## keep.nodes.R
library(ape);

# like keep.tip, but you can keep internal nodes toooooo
keep.nodes <- function (phy, nds) {
  # stats
  Ntip <- length(phy$tip.label);
  NEWROOT <- Ntip + 1;
  Nnode <- phy$Nnode;
  hasNL <- !is.null(phy$node.label);

  # first, determine nature of passed in nds
  # does not support mix of indices and names. pick one!
  # if labels are passed in rather than indices, convert
  if (!is.numeric(nds)) {
    mm <- match(nds, phy$tip.label);
    if (any(is.na(mm))) {
      idx <- which(is.na(mm));
      if (!hasNL) {
        stop("Error: nds do not all map to tip.labels, and node.label not present");
      } else {
         nl <- match(nds[idx], phy$node.label);
         if (any(is.na(nl))) {
           stop("Failed to match nds to node.label");
         } else {
           # extract node index from label
           mm[idx] <- nl + Ntip;
         }
      }
    }
    nds <- mm;
  }

  # get all nodes to be retained (including mrcas)
  allnds <- mrca(phy, full=T)[nds,];
  # put in nicer form
  allnds <- unique(as.numeric(allnds));
  # find which edges to retain
  keep <- which(phy$edge[,1] %in% allnds & phy$edge[,2] %in% allnds);
  phy$edge <- phy$edge[keep,];
  phy$edge.length <- phy$edge.length[keep];
  phy$tip.label <- phy$tip.label[phy$edge[,2][phy$edge[,2] <= Ntip]]

  ## find the new terminal edges
  TERMS <- !(phy$edge[, 2] %in% phy$edge[, 1]);
  ## get the old No. of the nodes and tips that become tips:
  oldNo.ofNewTips <- phy$edge[TERMS, 2];
  n <- length(oldNo.ofNewTips);
  phy$edge[TERMS, 2] <- rank(phy$edge[TERMS, 2]);

  # need new tip labels for internal nodes that become terminals
  # this will use node.label if present, otherwise will be NewTip*
  node2tip <- oldNo.ofNewTips[oldNo.ofNewTips > Ntip];
  new.tip.label <-
    if (!length(node2tip)) {
      character(0);
    } else {
      if (!hasNL) {
        paste0("NewTip", seq(1:length(node2tip)));
      } else {
        phy$node.label[node2tip - Ntip];
      }
    }
  phy$tip.label <- c(phy$tip.label, new.tip.label);

  # get rid of node labels that will not exist in new tree
  if (!is.null(phy$node.label)) {
    phy$node.label <- phy$node.label[unique(phy$edge[,1]) - Ntip];
  }

  # reindex everything
  phy$Nnode <- dim(phy$edge)[1] - n + 1L;
  newNb <- integer(Ntip + Nnode);
  newNb[NEWROOT] <- n + 1L;
  sndcol <- phy$edge[, 2] > n;
  newNb[sort(phy$edge[sndcol, 2])] <- (n + 2):(n + phy$Nnode);
  phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]];
  phy$edge[, 1] <- newNb[phy$edge[, 1]];
  return(phy);
}

#############################################################
## example data

# test tree string
tstr <- "(((((f:0.1,g:0.1):0.9,e:1.0):0.3,d:1.3):0.4,c:1.7):1.3,b:3.0);";
# tree with node labels (purposely left some blank for testing)
tstr2 <- "(((((f:0.1,g:0.1):0.9,e:1.0)h:0.3,d:1.3)i:0.4,c:1.7)a:1.3,b:3.0);";

phy <- read.tree(text=tstr);
phy2 <- read.tree(text=tstr2);
plot(phy); axisPhylo(); nodelabels();

# objective: keep b, c, and 9 (making this a terminal)

#############################################################
## example 1: tree with no existing node labels

# get the node ids for the terminal you want to keep
nds <- match(c("b", "c"), phy$tip.label);
# add in the internal nodes to keep. you can find these indices with getMRCA
nds <- c(nds, getMRCA(phy, c("d", "g")));

# tree with no node labels
fie <- keep.nodes(phy, nds);
plot(fie); axisPhylo();

#############################################################
## example 2: tree HAS existing node labels ##

# label version
nds2 <- c("b", "c", "i");

# now, one with node labels
fie2 <- keep.nodes(phy2, nds);
plot(fie2); axisPhylo();

#############################################################
# finally, if your tree does not have node labels, but you do not want
# to mess around with MRCAs, give it node labels
phy3 <- phy;
phy3$node.label <- paste0("Node", seq(1:phy3$Nnode));
# plot the tree to see labels
plot(phy3); axisPhylo(); nodelabels(text=phy3$node.label);
# let's do this
fie3 <- keep.nodes(phy3, c("b", "c", "Node3"));
plot(fie3); axisPhylo();
	library(ape);

	# like keep.tip, but you can keep internal nodes toooooo
	keep.nodes <- function (phy, nds) {
	# stats
	Ntip <- length(phy$tip.label);
	NEWROOT <- Ntip + 1;
	Nnode <- phy$Nnode;
	hasNL <- !is.null(phy$node.label);

	# first, determine nature of passed in nds
	# does not support mix of indices and names. pick one!
	# if labels are passed in rather than indices, convert
	if (!is.numeric(nds)) {
	mm <- match(nds, phy$tip.label);
	if (any(is.na(mm))) {
	idx <- which(is.na(mm));
	if (!hasNL) {
	stop("Error: nds do not all map to tip.labels, and node.label not present");
	} else {
	nl <- match(nds[idx], phy$node.label);
	if (any(is.na(nl))) {
	stop("Failed to match nds to node.label");
	} else {
	# extract node index from label
	mm[idx] <- nl + Ntip;
	}
	}
	}
	nds <- mm;
	}

	# get all nodes to be retained (including mrcas)
	allnds <- mrca(phy, full=T)[nds,];
	# put in nicer form
	allnds <- unique(as.numeric(allnds));
	# find which edges to retain
	keep <- which(phy$edge[,1] %in% allnds & phy$edge[,2] %in% allnds);
	phy$edge <- phy$edge[keep,];
	phy$edge.length <- phy$edge.length[keep];
	phy$tip.label <- phy$tip.label[phy$edge[,2][phy$edge[,2] <= Ntip]]

	## find the new terminal edges
	TERMS <- !(phy$edge[, 2] %in% phy$edge[, 1]);
	## get the old No. of the nodes and tips that become tips:
	oldNo.ofNewTips <- phy$edge[TERMS, 2];
	n <- length(oldNo.ofNewTips);
	phy$edge[TERMS, 2] <- rank(phy$edge[TERMS, 2]);

	# need new tip labels for internal nodes that become terminals
	# this will use node.label if present, otherwise will be NewTip*
	node2tip <- oldNo.ofNewTips[oldNo.ofNewTips > Ntip];
	new.tip.label <-
	if (!length(node2tip)) {
	character(0);
	} else {
	if (!hasNL) {
	paste0("NewTip", seq(1:length(node2tip)));
	} else {
	phy$node.label[node2tip - Ntip];
	}
	}
	phy$tip.label <- c(phy$tip.label, new.tip.label);

	# get rid of node labels that will not exist in new tree
	if (!is.null(phy$node.label)) {
	phy$node.label <- phy$node.label[unique(phy$edge[,1]) - Ntip];
	}

	# reindex everything
	phy$Nnode <- dim(phy$edge)[1] - n + 1L;
	newNb <- integer(Ntip + Nnode);
	newNb[NEWROOT] <- n + 1L;
	sndcol <- phy$edge[, 2] > n;
	newNb[sort(phy$edge[sndcol, 2])] <- (n + 2):(n + phy$Nnode);
	phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]];
	phy$edge[, 1] <- newNb[phy$edge[, 1]];
	return(phy);
	}

	#############################################################
	## example data

	# test tree string
	tstr <- "(((((f:0.1,g:0.1):0.9,e:1.0):0.3,d:1.3):0.4,c:1.7):1.3,b:3.0);";
	# tree with node labels (purposely left some blank for testing)
	tstr2 <- "(((((f:0.1,g:0.1):0.9,e:1.0)h:0.3,d:1.3)i:0.4,c:1.7)a:1.3,b:3.0);";

	phy <- read.tree(text=tstr);
	phy2 <- read.tree(text=tstr2);
	plot(phy); axisPhylo(); nodelabels();

	# objective: keep b, c, and 9 (making this a terminal)

	#############################################################
	## example 1: tree with no existing node labels

	# get the node ids for the terminal you want to keep
	nds <- match(c("b", "c"), phy$tip.label);
	# add in the internal nodes to keep. you can find these indices with getMRCA
	nds <- c(nds, getMRCA(phy, c("d", "g")));

	# tree with no node labels
	fie <- keep.nodes(phy, nds);
	plot(fie); axisPhylo();

	#############################################################
	## example 2: tree HAS existing node labels ##

	# label version
	nds2 <- c("b", "c", "i");

	# now, one with node labels
	fie2 <- keep.nodes(phy2, nds);
	plot(fie2); axisPhylo();

	#############################################################
	# finally, if your tree does not have node labels, but you do not want
	# to mess around with MRCAs, give it node labels
	phy3 <- phy;
	phy3$node.label <- paste0("Node", seq(1:phy3$Nnode));
	# plot the tree to see labels
	plot(phy3); axisPhylo(); nodelabels(text=phy3$node.label);
	# let's do this
	fie3 <- keep.nodes(phy3, c("b", "c", "Node3"));
	plot(fie3); axisPhylo();