ibartomeus/multifunc2

## multifunc2
# This approach to assess multifunctionality is based in the idea that sites providing
    # best multiple functions will have not only a high mean value across function
    # (approach 3 in Byrnes et al.) but also low variability in the function delivered
    # across functions (i.e. Coef of var).

#I use Byrnes multifunc package to ilustrate it.
library(devtools)
install_github("multifunc", "jebyrnes")
library(multifunc)
library(ggplot2)

#load data and clean selected variables as per their example
data(all_biodepth)
allVars<-qw(biomassY3, root3, N.g.m2,  light3, N.Soil, wood3, cotton3)
germany<-subset(all_biodepth, all_biodepth$location=="Germany")
vars<-whichVars(germany, allVars)

#re-normalize N.Soil so that everything is on the same sign-scale (e.g. the maximum level of a function is the "best" function)
germany$N.Soil<- -1*germany$N.Soil +max(germany$N.Soil, na.rm=TRUE)

#Average method from multifunc package (aproach 3 in Byrnes et al.)
germany<-cbind(germany, getStdAndMeanFunctions(germany, vars))
head(germany)
ggplot(aes(x=Diversity, y=meanFunction),data=germany)+geom_point(size=3)+
    theme_bw(base_size=15)+
    stat_smooth(method="lm", colour="black", size=2)


#modified Byrnes function to calculate also CV
#first I load two functions need later on

#coeficient of variation
coef_var <- function(x, na.rm = TRUE) {
    ifelse(sd(x, na.rm = na.rm) == 0 & mean(x, na.rm = na.rm) == 0, 0,
           (sd(x, na.rm = na.rm)/mean(x, na.rm = na.rm)))}

#and the distance from a point to a line borrowed from:
##  Credits:
##  Theory by Paul Bourke http://local.wasp.uwa.edu.au/~pbourke/geometry/pointline/
##  Based in part on C code by Damian Coventry Tuesday, 16 July 2002
##  Based on VBA code by Brandon Crosby 9-6-05 (2 dimensions)
##  With grateful thanks for answering our needs!
##  This is an R (http://www.r-project.org) implementation by Gregoire Thomas 7/11/08
##  I.Bartomeus added the intersection Point to the Output
distancePointLine <- function(x, y, slope, intercept) {
    ## x, y is the point to test.
    ## slope, intercept is the line to check distance.
    ##
    ## Returns distance from the line.
    ##
    ## Returns 9999 on 0 denominator conditions.
    x1 <- x-10
    x2 <- x+10
    y1 <- x1*slope+intercept
    y2 <- x2*slope+intercept
    distancePointSegment(x,y, x1,y1, x2,y2)
}
distancePointSegment <- function(px, py, x1, y1, x2, y2, return_point = TRUE) {
    ## px,py is the point to test.
    ## x1,y1,x2,y2 is the line to check distance.
    ##
    ## Returns distance from the line, or if the intersecting point on the line nearest
    ## the point tested is outside the endpoints of the line, the distance to the
    ## nearest endpoint.
    ##
    ## Returns 9999 on 0 denominator conditions.
    lineMagnitude <- function(x1, y1, x2, y2) sqrt((x2-x1)^2+(y2-y1)^2)
    ans <- NULL
    ix <- iy <- 0   # intersecting point
    lineMag <- lineMagnitude(x1, y1, x2, y2)
    if( lineMag < 0.00000001) {
        warning("short segment")
        return(9999)
    }
    u <- (((px - x1) * (x2 - x1)) + ((py - y1) * (y2 - y1)))
    u <- u / (lineMag * lineMag)
    if((u < 0.00001) || (u > 1)) {
        ## closest point does not fall within the line segment, take the shorter distance
        ## to an endpoint
        ix <- lineMagnitude(px, py, x1, y1)
        iy <- lineMagnitude(px, py, x2, y2)
        if(ix > iy)  ans <- iy
        else ans <- ix
    } else {
        ## Intersecting point is on the line, use the formula
        ix <- x1 + u * (x2 - x1)
        iy <- y1 + u * (y2 - y1)
        ans <- lineMagnitude(px, py, ix, iy)
    }
    if(return_point == TRUE){
        Out <- c(ix = ix, iy=iy, ans=ans)
    }else{
        Out <- ans
    }
    Out
}

#now I tweak byrnes function to get also the CV and calculate a mean function modified by CV
getMeanCV <- function(data, vars, standardizeFunction=standardizeUnitScale){
    ret<-colwise(standardizeFunction)(data[,which(names(data) %in% vars)])

    names(ret)<-paste(names(ret), ".std", sep="")
    ret$meanFunction<-rowSums(ret)/ncol(ret)
    # get CV
    ret$CVFunction<- apply(ret, MARGIN= 1, coef_var)
    # See relationship between mean and CV
    plot(ret$meanFunction ~ ret$CVFunction, xlim = c(0,1), ylim = c(0,1))
    abline(coef= c(1, -1)) #need to be sure is constrained between 0-1.
    # My rationale is that best sites will be in the upper left corner,
    # let's rank them along the 45º line, then.
    # first we find for each data point its closest point situated on the drawn line
    p<- matrix(ncol = 3, nrow = nrow(ret))
    for(i in 1:nrow(ret)){
        p[i,] <- distancePointLine(x= ret$CVFunction[i], y = ret$meanFunction[i],
                                   slope = -1, intercept = 1)
    }
    colnames(p) <- c("x","y","distToLine")
    ret <- cbind(ret,p[,c(1,2)])
    #plot the new points
    points(ret$x,ret$y, col = "red")
    #then I calculate its relative distances from each new point p to the worst point (i.e.site with max Y min X)
    minimum <- c(max(ret$CVFunction), min(ret$meanFunction))
    #to avoid a case with 0 function maybe is better to use as min c(0,1)?
    #minimum <- c(1,0)
    dis <- c()
    for(i in 1:nrow(ret)){
        points_ <- c(ret$CVFunction[i],ret$meanFunction[i])
        dis[i] <- sqrt(sum((points_ - minimum) ^ 2))
    }
    ret$MeanCV <- dis #store the new mean modified by CV
    #x and y can be removed from ret before exiting.
    #plot can also be removed, but helps visually
    return(ret)
}
#now, a site with a moderate mean function, but low CV (i.e. most functions are fullfiled)
    #is ranked better than a site with the same mean function,
    #but with high values for some functions and low for others.

#Use this method in the germany example
germany2 <-cbind(germany, getMeanCV(germany, vars))
head(germany2)

#see how the calculated measures correlate
plot(germany2$meanFunction ~ germany2$MeanCV)
plot(germany2$CVFunction ~ germany2$MeanCV)

#See the reslut
ggplot(aes(x=Diversity, y=MeanCV),data=germany2)+geom_point(size=3)+
    theme_bw(base_size=15)+
    stat_smooth(method="lm", colour="black", size=2)

#To visualize all functions
colnames(germany2)
germanymelted <- melt(germany2[,c(8,129:134,141,144)], id.vars = "Diversity")

ggplot(aes(x=Diversity, y=value),data=germanymelted)+geom_point(size=3)+
    facet_grid(~variable) + theme_bw(base_size=15)+
    stat_smooth(method="lm", colour="black", size=2) +
    xlab("\nSpecies Richness") +
    ylab("Standardized value of function\n")

#Note that the modified mean value is artificial... so face values should not be compared, but only slopes.
#In this case doesn't change much the output, I guess because most functions are correlated among them?
	# This approach to assess multifunctionality is based in the idea that sites providing
	# best multiple functions will have not only a high mean value across function
	# (approach 3 in Byrnes et al.) but also low variability in the function delivered
	# across functions (i.e. Coef of var).

	#I use Byrnes multifunc package to ilustrate it.
	library(devtools)
	install_github("multifunc", "jebyrnes")
	library(multifunc)
	library(ggplot2)

	#load data and clean selected variables as per their example
	data(all_biodepth)
	allVars<-qw(biomassY3, root3, N.g.m2, light3, N.Soil, wood3, cotton3)
	germany<-subset(all_biodepth, all_biodepth$location=="Germany")
	vars<-whichVars(germany, allVars)

	#re-normalize N.Soil so that everything is on the same sign-scale (e.g. the maximum level of a function is the "best" function)
	germany$N.Soil<- -1*germany$N.Soil +max(germany$N.Soil, na.rm=TRUE)

	#Average method from multifunc package (aproach 3 in Byrnes et al.)
	germany<-cbind(germany, getStdAndMeanFunctions(germany, vars))
	head(germany)
	ggplot(aes(x=Diversity, y=meanFunction),data=germany)+geom_point(size=3)+
	theme_bw(base_size=15)+
	stat_smooth(method="lm", colour="black", size=2)


	#modified Byrnes function to calculate also CV
	#first I load two functions need later on

	#coeficient of variation
	coef_var <- function(x, na.rm = TRUE) {
	ifelse(sd(x, na.rm = na.rm) == 0 & mean(x, na.rm = na.rm) == 0, 0,
	(sd(x, na.rm = na.rm)/mean(x, na.rm = na.rm)))}

	#and the distance from a point to a line borrowed from:
	## Credits:
	## Theory by Paul Bourke http://local.wasp.uwa.edu.au/~pbourke/geometry/pointline/
	## Based in part on C code by Damian Coventry Tuesday, 16 July 2002
	## Based on VBA code by Brandon Crosby 9-6-05 (2 dimensions)
	## With grateful thanks for answering our needs!
	## This is an R (http://www.r-project.org) implementation by Gregoire Thomas 7/11/08
	## I.Bartomeus added the intersection Point to the Output
	distancePointLine <- function(x, y, slope, intercept) {
	## x, y is the point to test.
	## slope, intercept is the line to check distance.
	##
	## Returns distance from the line.
	##
	## Returns 9999 on 0 denominator conditions.
	x1 <- x-10
	x2 <- x+10
	y1 <- x1*slope+intercept
	y2 <- x2*slope+intercept
	distancePointSegment(x,y, x1,y1, x2,y2)
	}
	distancePointSegment <- function(px, py, x1, y1, x2, y2, return_point = TRUE) {
	## px,py is the point to test.
	## x1,y1,x2,y2 is the line to check distance.
	##
	## Returns distance from the line, or if the intersecting point on the line nearest
	## the point tested is outside the endpoints of the line, the distance to the
	## nearest endpoint.
	##
	## Returns 9999 on 0 denominator conditions.
	lineMagnitude <- function(x1, y1, x2, y2) sqrt((x2-x1)^2+(y2-y1)^2)
	ans <- NULL
	ix <- iy <- 0 # intersecting point
	lineMag <- lineMagnitude(x1, y1, x2, y2)
	if( lineMag < 0.00000001) {
	warning("short segment")
	return(9999)
	}
	u <- (((px - x1) * (x2 - x1)) + ((py - y1) * (y2 - y1)))
	u <- u / (lineMag * lineMag)
	if((u < 0.00001) \|\| (u > 1)) {
	## closest point does not fall within the line segment, take the shorter distance
	## to an endpoint
	ix <- lineMagnitude(px, py, x1, y1)
	iy <- lineMagnitude(px, py, x2, y2)
	if(ix > iy) ans <- iy
	else ans <- ix
	} else {
	## Intersecting point is on the line, use the formula
	ix <- x1 + u * (x2 - x1)
	iy <- y1 + u * (y2 - y1)
	ans <- lineMagnitude(px, py, ix, iy)
	}
	if(return_point == TRUE){
	Out <- c(ix = ix, iy=iy, ans=ans)
	}else{
	Out <- ans
	}
	Out
	}

	#now I tweak byrnes function to get also the CV and calculate a mean function modified by CV
	getMeanCV <- function(data, vars, standardizeFunction=standardizeUnitScale){
	ret<-colwise(standardizeFunction)(data[,which(names(data) %in% vars)])

	names(ret)<-paste(names(ret), ".std", sep="")
	ret$meanFunction<-rowSums(ret)/ncol(ret)
	# get CV
	ret$CVFunction<- apply(ret, MARGIN= 1, coef_var)
	# See relationship between mean and CV
	plot(ret$meanFunction ~ ret$CVFunction, xlim = c(0,1), ylim = c(0,1))
	abline(coef= c(1, -1)) #need to be sure is constrained between 0-1.
	# My rationale is that best sites will be in the upper left corner,
	# let's rank them along the 45º line, then.
	# first we find for each data point its closest point situated on the drawn line
	p<- matrix(ncol = 3, nrow = nrow(ret))
	for(i in 1:nrow(ret)){
	p[i,] <- distancePointLine(x= ret$CVFunction[i], y = ret$meanFunction[i],
	slope = -1, intercept = 1)
	}
	colnames(p) <- c("x","y","distToLine")
	ret <- cbind(ret,p[,c(1,2)])
	#plot the new points
	points(ret$x,ret$y, col = "red")
	#then I calculate its relative distances from each new point p to the worst point (i.e.site with max Y min X)
	minimum <- c(max(ret$CVFunction), min(ret$meanFunction))
	#to avoid a case with 0 function maybe is better to use as min c(0,1)?
	#minimum <- c(1,0)
	dis <- c()
	for(i in 1:nrow(ret)){
	points_ <- c(ret$CVFunction[i],ret$meanFunction[i])
	dis[i] <- sqrt(sum((points_ - minimum) ^ 2))
	}
	ret$MeanCV <- dis #store the new mean modified by CV
	#x and y can be removed from ret before exiting.
	#plot can also be removed, but helps visually
	return(ret)
	}
	#now, a site with a moderate mean function, but low CV (i.e. most functions are fullfiled)
	#is ranked better than a site with the same mean function,
	#but with high values for some functions and low for others.

	#Use this method in the germany example
	germany2 <-cbind(germany, getMeanCV(germany, vars))
	head(germany2)

	#see how the calculated measures correlate
	plot(germany2$meanFunction ~ germany2$MeanCV)
	plot(germany2$CVFunction ~ germany2$MeanCV)

	#See the reslut
	ggplot(aes(x=Diversity, y=MeanCV),data=germany2)+geom_point(size=3)+
	theme_bw(base_size=15)+
	stat_smooth(method="lm", colour="black", size=2)

	#To visualize all functions
	colnames(germany2)
	germanymelted <- melt(germany2[,c(8,129:134,141,144)], id.vars = "Diversity")

	ggplot(aes(x=Diversity, y=value),data=germanymelted)+geom_point(size=3)+
	facet_grid(~variable) + theme_bw(base_size=15)+
	stat_smooth(method="lm", colour="black", size=2) +
	xlab("\nSpecies Richness") +
	ylab("Standardized value of function\n")

	#Note that the modified mean value is artificial... so face values should not be compared, but only slopes.
	#In this case doesn't change much the output, I guess because most functions are correlated among them?