ibartomeus/multifunc2

## 163 changes: 163 additions & 0 deletions multifunc2
@@ -0,0 +1,163 @@

    # This approach to assess multifunctionality is based in the idea that sites providing
# This approach to assess multifunctionality is based in the idea that sites providing

        # best multiple functions will have not only a high mean value across function
    # best multiple functions will have not only a high mean value across function

        # (approach 3 in Byrnes et al.) but also low variability in the function delivered
    # (approach 3 in Byrnes et al.) but also low variability in the function delivered

        # across functions (i.e. Coef of var).
    # across functions (i.e. Coef of var).


    #I use Byrnes multifunc package to ilustrate it.
#I use Byrnes multifunc package to ilustrate it.

    library(devtools)
library(devtools)

    install_github("multifunc", "jebyrnes")
install_github("multifunc", "jebyrnes")

    library(multifunc)
library(multifunc)

    library(ggplot2)
library(ggplot2)


    #load data and clean selected variables as per their example
#load data and clean selected variables as per their example

    data(all_biodepth)
data(all_biodepth)

    allVars<-qw(biomassY3, root3, N.g.m2,  light3, N.Soil, wood3, cotton3)
allVars<-qw(biomassY3, root3, N.g.m2,  light3, N.Soil, wood3, cotton3)

    germany<-subset(all_biodepth, all_biodepth$location=="Germany")
germany<-subset(all_biodepth, all_biodepth$location=="Germany")

    vars<-whichVars(germany, allVars)
vars<-whichVars(germany, allVars)


    #re-normalize N.Soil so that everything is on the same sign-scale (e.g. the maximum level of a function is the "best" function)
#re-normalize N.Soil so that everything is on the same sign-scale (e.g. the maximum level of a function is the "best" function)

    germany$N.Soil<- -1*germany$N.Soil +max(germany$N.Soil, na.rm=TRUE)
germany$N.Soil<- -1*germany$N.Soil +max(germany$N.Soil, na.rm=TRUE)


    #Average method from multifunc package (aproach 3 in Byrnes et al.)
#Average method from multifunc package (aproach 3 in Byrnes et al.)

    germany<-cbind(germany, getStdAndMeanFunctions(germany, vars))
germany<-cbind(germany, getStdAndMeanFunctions(germany, vars))

    head(germany)
head(germany)

    ggplot(aes(x=Diversity, y=meanFunction),data=germany)+geom_point(size=3)+
ggplot(aes(x=Diversity, y=meanFunction),data=germany)+geom_point(size=3)+

        theme_bw(base_size=15)+
    theme_bw(base_size=15)+

        stat_smooth(method="lm", colour="black", size=2)
    stat_smooth(method="lm", colour="black", size=2)


    #modified Byrnes function to calculate also CV
#modified Byrnes function to calculate also CV

    #first I load two functions need later on
#first I load two functions need later on


    #coeficient of variation
#coeficient of variation

    coef_var <- function(x, na.rm = TRUE) {
coef_var <- function(x, na.rm = TRUE) {

        ifelse(sd(x, na.rm = na.rm) == 0 & mean(x, na.rm = na.rm) == 0, 0,
    ifelse(sd(x, na.rm = na.rm) == 0 & mean(x, na.rm = na.rm) == 0, 0,

               (sd(x, na.rm = na.rm)/mean(x, na.rm = na.rm)))}
           (sd(x, na.rm = na.rm)/mean(x, na.rm = na.rm)))}


    #and the distance from a point to a line borrowed from:
#and the distance from a point to a line borrowed from:

    ##  Credits:
##  Credits:

    ##  Theory by Paul Bourke http://local.wasp.uwa.edu.au/~pbourke/geometry/pointline/
##  Theory by Paul Bourke http://local.wasp.uwa.edu.au/~pbourke/geometry/pointline/

    ##  Based in part on C code by Damian Coventry Tuesday, 16 July 2002
##  Based in part on C code by Damian Coventry Tuesday, 16 July 2002

    ##  Based on VBA code by Brandon Crosby 9-6-05 (2 dimensions)
##  Based on VBA code by Brandon Crosby 9-6-05 (2 dimensions)

    ##  With grateful thanks for answering our needs!
##  With grateful thanks for answering our needs!

    ##  This is an R (http://www.r-project.org) implementation by Gregoire Thomas 7/11/08
##  This is an R (http://www.r-project.org) implementation by Gregoire Thomas 7/11/08

    ##  I.Bartomeus added the intersection Point to the Output
##  I.Bartomeus added the intersection Point to the Output

    distancePointLine <- function(x, y, slope, intercept) {
distancePointLine <- function(x, y, slope, intercept) {

        ## x, y is the point to test.
    ## x, y is the point to test.

        ## slope, intercept is the line to check distance.
    ## slope, intercept is the line to check distance.

        ##
    ##

        ## Returns distance from the line.
    ## Returns distance from the line.

        ##
    ##

        ## Returns 9999 on 0 denominator conditions.
    ## Returns 9999 on 0 denominator conditions.

        x1 <- x-10
    x1 <- x-10

        x2 <- x+10
    x2 <- x+10

        y1 <- x1*slope+intercept
    y1 <- x1*slope+intercept

        y2 <- x2*slope+intercept
    y2 <- x2*slope+intercept

        distancePointSegment(x,y, x1,y1, x2,y2)
    distancePointSegment(x,y, x1,y1, x2,y2)

    }
}

    distancePointSegment <- function(px, py, x1, y1, x2, y2, return_point = TRUE) {
distancePointSegment <- function(px, py, x1, y1, x2, y2, return_point = TRUE) {

        ## px,py is the point to test.
    ## px,py is the point to test.

        ## x1,y1,x2,y2 is the line to check distance.
    ## x1,y1,x2,y2 is the line to check distance.

        ##
    ##

        ## Returns distance from the line, or if the intersecting point on the line nearest
    ## Returns distance from the line, or if the intersecting point on the line nearest

        ## the point tested is outside the endpoints of the line, the distance to the
    ## the point tested is outside the endpoints of the line, the distance to the

        ## nearest endpoint.
    ## nearest endpoint.

        ##
    ##

        ## Returns 9999 on 0 denominator conditions.
    ## Returns 9999 on 0 denominator conditions.

        lineMagnitude <- function(x1, y1, x2, y2) sqrt((x2-x1)^2+(y2-y1)^2)
    lineMagnitude <- function(x1, y1, x2, y2) sqrt((x2-x1)^2+(y2-y1)^2)

        ans <- NULL
    ans <- NULL

        ix <- iy <- 0   # intersecting point
    ix <- iy <- 0   # intersecting point

        lineMag <- lineMagnitude(x1, y1, x2, y2)
    lineMag <- lineMagnitude(x1, y1, x2, y2)

        if( lineMag < 0.00000001) {
    if( lineMag < 0.00000001) {

            warning("short segment")
        warning("short segment")

            return(9999)
        return(9999)

        }
    }

        u <- (((px - x1) * (x2 - x1)) + ((py - y1) * (y2 - y1)))
    u <- (((px - x1) * (x2 - x1)) + ((py - y1) * (y2 - y1)))

        u <- u / (lineMag * lineMag)
    u <- u / (lineMag * lineMag)

        if((u < 0.00001) || (u > 1)) {
    if((u < 0.00001) || (u > 1)) {

            ## closest point does not fall within the line segment, take the shorter distance
        ## closest point does not fall within the line segment, take the shorter distance

            ## to an endpoint
        ## to an endpoint

            ix <- lineMagnitude(px, py, x1, y1)
        ix <- lineMagnitude(px, py, x1, y1)

            iy <- lineMagnitude(px, py, x2, y2)
        iy <- lineMagnitude(px, py, x2, y2)

            if(ix > iy)  ans <- iy
        if(ix > iy)  ans <- iy

            else ans <- ix
        else ans <- ix

        } else {
    } else {

            ## Intersecting point is on the line, use the formula
        ## Intersecting point is on the line, use the formula

            ix <- x1 + u * (x2 - x1)
        ix <- x1 + u * (x2 - x1)

            iy <- y1 + u * (y2 - y1)
        iy <- y1 + u * (y2 - y1)

            ans <- lineMagnitude(px, py, ix, iy)
        ans <- lineMagnitude(px, py, ix, iy)

        }
    }

        if(return_point == TRUE){
    if(return_point == TRUE){

            Out <- c(ix = ix, iy=iy, ans=ans)
        Out <- c(ix = ix, iy=iy, ans=ans)

        }else{
    }else{

            Out <- ans
        Out <- ans

        }
    }

        Out
    Out

    }
}


    #now I tweak byrnes function to get also the CV and calculate a mean function modified by CV
#now I tweak byrnes function to get also the CV and calculate a mean function modified by CV

    getMeanCV <- function(data, vars, standardizeFunction=standardizeUnitScale){
getMeanCV <- function(data, vars, standardizeFunction=standardizeUnitScale){

        ret<-colwise(standardizeFunction)(data[,which(names(data) %in% vars)])
    ret<-colwise(standardizeFunction)(data[,which(names(data) %in% vars)])


        names(ret)<-paste(names(ret), ".std", sep="")
    names(ret)<-paste(names(ret), ".std", sep="")

        ret$meanFunction<-rowSums(ret)/ncol(ret)
    ret$meanFunction<-rowSums(ret)/ncol(ret)

        # get CV
    # get CV

        ret$CVFunction<- apply(ret, MARGIN= 1, coef_var)
    ret$CVFunction<- apply(ret, MARGIN= 1, coef_var)

        # See relationship between mean and CV
    # See relationship between mean and CV

        plot(ret$meanFunction ~ ret$CVFunction, xlim = c(0,1), ylim = c(0,1))
    plot(ret$meanFunction ~ ret$CVFunction, xlim = c(0,1), ylim = c(0,1))

        abline(coef= c(1, -1)) #need to be sure is constrained between 0-1.
    abline(coef= c(1, -1)) #need to be sure is constrained between 0-1.

        # My rationale is that best sites will be in the upper left corner,
    # My rationale is that best sites will be in the upper left corner,

        # let's rank them along the 45º line, then.
    # let's rank them along the 45º line, then.

        # first we find for each data point its closest point situated on the drawn line
    # first we find for each data point its closest point situated on the drawn line

        p<- matrix(ncol = 3, nrow = nrow(ret))
    p<- matrix(ncol = 3, nrow = nrow(ret))

        for(i in 1:nrow(ret)){
    for(i in 1:nrow(ret)){

            p[i,] <- distancePointLine(x= ret$CVFunction[i], y = ret$meanFunction[i],
        p[i,] <- distancePointLine(x= ret$CVFunction[i], y = ret$meanFunction[i],

                                       slope = -1, intercept = 1)
                                   slope = -1, intercept = 1)

        }
    }

        colnames(p) <- c("x","y","distToLine")
    colnames(p) <- c("x","y","distToLine")

        ret <- cbind(ret,p[,c(1,2)])
    ret <- cbind(ret,p[,c(1,2)])

        #plot the new points
    #plot the new points

        points(ret$x,ret$y, col = "red")
    points(ret$x,ret$y, col = "red")

        #then I calculate its relative distances from each new point p to the worst point (i.e.site with max Y min X)
    #then I calculate its relative distances from each new point p to the worst point (i.e.site with max Y min X)

        minimum <- c(max(ret$CVFunction), min(ret$meanFunction))
    minimum <- c(max(ret$CVFunction), min(ret$meanFunction))

        #to avoid a case with 0 function maybe is better to use as min c(0,1)?
    #to avoid a case with 0 function maybe is better to use as min c(0,1)?

        #minimum <- c(1,0)
    #minimum <- c(1,0)

        dis <- c()
    dis <- c()

        for(i in 1:nrow(ret)){
    for(i in 1:nrow(ret)){

            points_ <- c(ret$CVFunction[i],ret$meanFunction[i])
        points_ <- c(ret$CVFunction[i],ret$meanFunction[i])

            dis[i] <- sqrt(sum((points_ - minimum) ^ 2))
        dis[i] <- sqrt(sum((points_ - minimum) ^ 2))

        }
    }

        ret$MeanCV <- dis #store the new mean modified by CV
    ret$MeanCV <- dis #store the new mean modified by CV

        #x and y can be removed from ret before exiting.
    #x and y can be removed from ret before exiting.

        #plot can also be removed, but helps visually
    #plot can also be removed, but helps visually

        return(ret)
    return(ret)

    }
}

    #now, a site with a moderate mean function, but low CV (i.e. most functions are fullfiled)
#now, a site with a moderate mean function, but low CV (i.e. most functions are fullfiled)

        #is ranked better than a site with the same mean function,
    #is ranked better than a site with the same mean function,

        #but with high values for some functions and low for others.
    #but with high values for some functions and low for others.


    #Use this method in the germany example
#Use this method in the germany example

    germany2 <-cbind(germany, getMeanCV(germany, vars))
germany2 <-cbind(germany, getMeanCV(germany, vars))

    head(germany2)
head(germany2)


    #see how the calculated measures correlate
#see how the calculated measures correlate

    plot(germany2$meanFunction ~ germany2$MeanCV)
plot(germany2$meanFunction ~ germany2$MeanCV)

    plot(germany2$CVFunction ~ germany2$MeanCV)
plot(germany2$CVFunction ~ germany2$MeanCV)


    #See the reslut
#See the reslut

    ggplot(aes(x=Diversity, y=MeanCV),data=germany2)+geom_point(size=3)+
ggplot(aes(x=Diversity, y=MeanCV),data=germany2)+geom_point(size=3)+

        theme_bw(base_size=15)+
    theme_bw(base_size=15)+

        stat_smooth(method="lm", colour="black", size=2)
    stat_smooth(method="lm", colour="black", size=2)


    #To visualize all functions
#To visualize all functions

    colnames(germany2)
colnames(germany2)

    germanymelted <- melt(germany2[,c(8,129:134,141,144)], id.vars = "Diversity")
germanymelted <- melt(germany2[,c(8,129:134,141,144)], id.vars = "Diversity")


    ggplot(aes(x=Diversity, y=value),data=germanymelted)+geom_point(size=3)+
ggplot(aes(x=Diversity, y=value),data=germanymelted)+geom_point(size=3)+

        facet_grid(~variable) + theme_bw(base_size=15)+
    facet_grid(~variable) + theme_bw(base_size=15)+

        stat_smooth(method="lm", colour="black", size=2) +
    stat_smooth(method="lm", colour="black", size=2) +

        xlab("\nSpecies Richness") +
    xlab("\nSpecies Richness") +

        ylab("Standardized value of function\n")
    ylab("Standardized value of function\n")


    #Note that the modified mean value is artificial... so face values should not be compared, but only slopes.
#Note that the modified mean value is artificial... so face values should not be compared, but only slopes.

    #In this case doesn't change much the output, I guess because most functions are correlated among them?
#In this case doesn't change much the output, I guess because most functions are correlated among them?