darmitage/ChaoPD

## ChaoPD
require(picante)
require(gtools)
require(reshape)


###########################################################
###              Utility functions                      ###
###########################################################

internal2tips.self = function (phy, int.node){
  #require(picante); require(ape)
  Ntaxa = length(phy$tip.label)
  Nnode = phy$Nnode
  if ((Ntaxa + Nnode - 1) != nrow(phy$edge)) {
    print("tree structure error")
    break
  }
  if (mode(int.node) == "character"){
    nodes = which(phy$node.label == int.node) + Ntaxa
  }else nodes = int.node
  tips = c()
  repeat {
    nodes = phy$edge[which(phy$edge[, 1] %in% nodes), 2]
    if (length(nodes) == 0)
      break
    tips = c(tips, nodes)
  }
  tips = tips[tips <= Ntaxa]
  if( int.node <= Ntaxa & length(tips) == 0 ){
    tips = int.node
  }
  tips = phy$tip.label[tips]
  return(tips)
}

nodes2root <-
  function(phy,int.node) {
    Ntaxa = length(phy$tip.label) + phy$Nnode
    Nnode = phy$Nnode
    tips = c()
    nodes = int.node
    repeat {
      nodes = phy$edge[which(phy$edge[,2]%in%nodes),1]
      if (length(nodes)==0) break
      tips = c(tips,nodes)
    }

    tips = tips[tips<=Ntaxa]
    return(c(rev(tips),int.node))
  }


###########################################################
###   Calculates phylogenetic diversity profiles        ###
###########################################################

ChaoD = function(comm, phy){
#Begin
    phy1 <- prune.sample(comm, phy)
    samp1 <- t(comm)
    samp1 <- samp1[mixedsort(rownames(samp1)),]
    taxa = nrow(samp1)
    rownames(samp1) <- rep(1:taxa)
    sums <- subset(rowSums(samp1), rowSums(samp1) > 0)
    samp1 <- samp1[names(sums),]
    samples = ncol(samp1)
    taxa = nrow(samp1)
    phy1$tip.label <- rep(1:length(phy1$tip.label))
    phy1 = prune.sample(t(samp1),phy1)
    Bnode = taxa + 1

    #Measures overall root-node length for each taxon
    Li = t(t(dist.nodes(phy1)[1:taxa, Bnode]))

    #Pull individual branch lengths
    Lb <- cbind(phy1$edge[,2], phy1$edge.length)
    Lb <- rbind(Lb, c(Bnode,0))
    Lb <- Lb[order(Lb[,1]),]

    #Build taxa abundance matrix
    p=matrix(0,taxa,samples)

    for (k in 1:samples){
      p[,k]<-samp1[,k]/sum(samp1[,k])
    }

    #Compute T, the total average branch length for a community
    That=matrix(0,1,samples)
    for (k in 1:samples){
      That[,k] <-sum(Li * p[,k])
    }

    #Compute Pb, the relative abundances of branches and Lbk, the matrix of branches/samples
    Pb <- matrix(NA, length(Lb[,2]), ncol = samples)
    Lbk <- matrix(NA, length(Lb[,2]), ncol = samples)

      for (k in 1:samples){
        for(i in 1:length(Lb[,1])){
          branch <- internal2tips.self(phy1,Lb[i,1])
          j = Lb[i,1]
          Pb[j,k] <- sum(p[branch,k])
        }}

    #Compute Lb, matrix of lengths of branches present in a community
      for (k in 1:samples){
        Sk <- as.numeric(which(samp1[,k] != 0))
        for(i in 1:length(Lbk[,1])) {
          Lbk[i,k] <- ifelse(Lb[i,1] %in% nodes2root(phy1, Sk), Lb[i,2], 0)
        }}

    #Initialize q values
    lenq = 50
    qq <- seq(length=lenq, from=0, by=.11)

    # Initialise the Diversity matrix to zero
    Dqz = matrix(0, lenq ,samples)

      # Loop to calculate the diversity Dqz for each value of q (iq) and each sample (k)

      for (k in 1:samples) {

        for (iq in 1:lenq) {
          q<-qq[iq]
          for (zpi in 1:length(Pb[,k])){
            if (Pb[zpi,k]>0)(
              Dqz[iq,k]<-Dqz[iq,k] + (Lbk[zpi,k]/That[,k]) * Pb[zpi,k]^q)
          }

          Dqz[iq,k] <- Dqz[iq,k]^(1/(1-q));
        }}
results = Dqz
return(results)
}

###########################################################
### ChaoRarefy- diversity profiles rarefied             ###
###########################################################

#This function takes your sample and phylogeny and rarefies to account for differences in sample sizes
# Y is usually the smallest sample in the community = min(rowSums(samp))
# X is the number of sampled communities to average the results over (should be >100)

ChaoRarefy=function (samp,phy,Y,K){
  Res<-matrix(NA,ncol=3,nrow=dim(samp)[1])
  rowsums<-rowSums(samp)
  df.list<-vector("list",K)

  for(a in 1:K){
    print(a)
    samplematrix1<-samp
    rare_sample<-rrarefy(samplematrix1,Y)
    rare_sample = rare_sample[,colSums(rare_sample)>0]
    dqz<-ChaoD(rare_sample,phy)
    df.list[[a]]<-dqz

  } # close for loop


  Res<-Reduce("+",df.list)/length(df.list)


  return(Res)

}


###########################################################
###                  An example                         ###
###########################################################
data(phylocom)
samp = phylocom$sample
phy = phylocom$phylo

ChaoRarefy(samp = samp, phy = phy, Y = min(rowSums(samp)), K = 100)
	require(picante)
	require(gtools)
	require(reshape)


	###########################################################
	### Utility functions ###
	###########################################################

	internal2tips.self = function (phy, int.node){
	#require(picante); require(ape)
	Ntaxa = length(phy$tip.label)
	Nnode = phy$Nnode
	if ((Ntaxa + Nnode - 1) != nrow(phy$edge)) {
	print("tree structure error")
	break
	}
	if (mode(int.node) == "character"){
	nodes = which(phy$node.label == int.node) + Ntaxa
	}else nodes = int.node
	tips = c()
	repeat {
	nodes = phy$edge[which(phy$edge[, 1] %in% nodes), 2]
	if (length(nodes) == 0)
	break
	tips = c(tips, nodes)
	}
	tips = tips[tips <= Ntaxa]
	if( int.node <= Ntaxa & length(tips) == 0 ){
	tips = int.node
	}
	tips = phy$tip.label[tips]
	return(tips)
	}

	nodes2root <-
	function(phy,int.node) {
	Ntaxa = length(phy$tip.label) + phy$Nnode
	Nnode = phy$Nnode
	tips = c()
	nodes = int.node
	repeat {
	nodes = phy$edge[which(phy$edge[,2]%in%nodes),1]
	if (length(nodes)==0) break
	tips = c(tips,nodes)
	}

	tips = tips[tips<=Ntaxa]
	return(c(rev(tips),int.node))
	}


	###########################################################
	### Calculates phylogenetic diversity profiles ###
	###########################################################

	ChaoD = function(comm, phy){
	#Begin
	phy1 <- prune.sample(comm, phy)
	samp1 <- t(comm)
	samp1 <- samp1[mixedsort(rownames(samp1)),]
	taxa = nrow(samp1)
	rownames(samp1) <- rep(1:taxa)
	sums <- subset(rowSums(samp1), rowSums(samp1) > 0)
	samp1 <- samp1[names(sums),]
	samples = ncol(samp1)
	taxa = nrow(samp1)
	phy1$tip.label <- rep(1:length(phy1$tip.label))
	phy1 = prune.sample(t(samp1),phy1)
	Bnode = taxa + 1

	#Measures overall root-node length for each taxon
	Li = t(t(dist.nodes(phy1)[1:taxa, Bnode]))

	#Pull individual branch lengths
	Lb <- cbind(phy1$edge[,2], phy1$edge.length)
	Lb <- rbind(Lb, c(Bnode,0))
	Lb <- Lb[order(Lb[,1]),]

	#Build taxa abundance matrix
	p=matrix(0,taxa,samples)

	for (k in 1:samples){
	p[,k]<-samp1[,k]/sum(samp1[,k])
	}

	#Compute T, the total average branch length for a community
	That=matrix(0,1,samples)
	for (k in 1:samples){
	That[,k] <-sum(Li * p[,k])
	}

	#Compute Pb, the relative abundances of branches and Lbk, the matrix of branches/samples
	Pb <- matrix(NA, length(Lb[,2]), ncol = samples)
	Lbk <- matrix(NA, length(Lb[,2]), ncol = samples)

	for (k in 1:samples){
	for(i in 1:length(Lb[,1])){
	branch <- internal2tips.self(phy1,Lb[i,1])
	j = Lb[i,1]
	Pb[j,k] <- sum(p[branch,k])
	}}

	#Compute Lb, matrix of lengths of branches present in a community
	for (k in 1:samples){
	Sk <- as.numeric(which(samp1[,k] != 0))
	for(i in 1:length(Lbk[,1])) {
	Lbk[i,k] <- ifelse(Lb[i,1] %in% nodes2root(phy1, Sk), Lb[i,2], 0)
	}}

	#Initialize q values
	lenq = 50
	qq <- seq(length=lenq, from=0, by=.11)

	# Initialise the Diversity matrix to zero
	Dqz = matrix(0, lenq ,samples)

	# Loop to calculate the diversity Dqz for each value of q (iq) and each sample (k)

	for (k in 1:samples) {

	for (iq in 1:lenq) {
	q<-qq[iq]
	for (zpi in 1:length(Pb[,k])){
	if (Pb[zpi,k]>0)(
	Dqz[iq,k]<-Dqz[iq,k] + (Lbk[zpi,k]/That[,k]) * Pb[zpi,k]^q)
	}

	Dqz[iq,k] <- Dqz[iq,k]^(1/(1-q));
	}}
	results = Dqz
	return(results)
	}

	###########################################################
	### ChaoRarefy- diversity profiles rarefied ###
	###########################################################

	#This function takes your sample and phylogeny and rarefies to account for differences in sample sizes
	# Y is usually the smallest sample in the community = min(rowSums(samp))
	# X is the number of sampled communities to average the results over (should be >100)

	ChaoRarefy=function (samp,phy,Y,K){
	Res<-matrix(NA,ncol=3,nrow=dim(samp)[1])
	rowsums<-rowSums(samp)
	df.list<-vector("list",K)

	for(a in 1:K){
	print(a)
	samplematrix1<-samp
	rare_sample<-rrarefy(samplematrix1,Y)
	rare_sample = rare_sample[,colSums(rare_sample)>0]
	dqz<-ChaoD(rare_sample,phy)
	df.list[[a]]<-dqz

	} # close for loop



	Res<-Reduce("+",df.list)/length(df.list)


	return(Res)

	}


	###########################################################
	### An example ###
	###########################################################
	data(phylocom)
	samp = phylocom$sample
	phy = phylocom$phylo

	ChaoRarefy(samp = samp, phy = phy, Y = min(rowSums(samp)), K = 100)