Skip to content

Instantly share code, notes, and snippets.

@ibartomeus
Created November 1, 2013 09:31

Revisions

  1. ibartomeus created this gist Nov 1, 2013.
    163 changes: 163 additions & 0 deletions multifunc2
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,163 @@
    # This approach to assess multifunctionality is based in the idea that sites providing
    # best multiple functions will have not only a high mean value across function
    # (approach 3 in Byrnes et al.) but also low variability in the function delivered
    # across functions (i.e. Coef of var).

    #I use Byrnes multifunc package to ilustrate it.
    library(devtools)
    install_github("multifunc", "jebyrnes")
    library(multifunc)
    library(ggplot2)

    #load data and clean selected variables as per their example
    data(all_biodepth)
    allVars<-qw(biomassY3, root3, N.g.m2, light3, N.Soil, wood3, cotton3)
    germany<-subset(all_biodepth, all_biodepth$location=="Germany")
    vars<-whichVars(germany, allVars)

    #re-normalize N.Soil so that everything is on the same sign-scale (e.g. the maximum level of a function is the "best" function)
    germany$N.Soil<- -1*germany$N.Soil +max(germany$N.Soil, na.rm=TRUE)

    #Average method from multifunc package (aproach 3 in Byrnes et al.)
    germany<-cbind(germany, getStdAndMeanFunctions(germany, vars))
    head(germany)
    ggplot(aes(x=Diversity, y=meanFunction),data=germany)+geom_point(size=3)+
    theme_bw(base_size=15)+
    stat_smooth(method="lm", colour="black", size=2)


    #modified Byrnes function to calculate also CV
    #first I load two functions need later on

    #coeficient of variation
    coef_var <- function(x, na.rm = TRUE) {
    ifelse(sd(x, na.rm = na.rm) == 0 & mean(x, na.rm = na.rm) == 0, 0,
    (sd(x, na.rm = na.rm)/mean(x, na.rm = na.rm)))}

    #and the distance from a point to a line borrowed from:
    ## Credits:
    ## Theory by Paul Bourke http://local.wasp.uwa.edu.au/~pbourke/geometry/pointline/
    ## Based in part on C code by Damian Coventry Tuesday, 16 July 2002
    ## Based on VBA code by Brandon Crosby 9-6-05 (2 dimensions)
    ## With grateful thanks for answering our needs!
    ## This is an R (http://www.r-project.org) implementation by Gregoire Thomas 7/11/08
    ## I.Bartomeus added the intersection Point to the Output
    distancePointLine <- function(x, y, slope, intercept) {
    ## x, y is the point to test.
    ## slope, intercept is the line to check distance.
    ##
    ## Returns distance from the line.
    ##
    ## Returns 9999 on 0 denominator conditions.
    x1 <- x-10
    x2 <- x+10
    y1 <- x1*slope+intercept
    y2 <- x2*slope+intercept
    distancePointSegment(x,y, x1,y1, x2,y2)
    }
    distancePointSegment <- function(px, py, x1, y1, x2, y2, return_point = TRUE) {
    ## px,py is the point to test.
    ## x1,y1,x2,y2 is the line to check distance.
    ##
    ## Returns distance from the line, or if the intersecting point on the line nearest
    ## the point tested is outside the endpoints of the line, the distance to the
    ## nearest endpoint.
    ##
    ## Returns 9999 on 0 denominator conditions.
    lineMagnitude <- function(x1, y1, x2, y2) sqrt((x2-x1)^2+(y2-y1)^2)
    ans <- NULL
    ix <- iy <- 0 # intersecting point
    lineMag <- lineMagnitude(x1, y1, x2, y2)
    if( lineMag < 0.00000001) {
    warning("short segment")
    return(9999)
    }
    u <- (((px - x1) * (x2 - x1)) + ((py - y1) * (y2 - y1)))
    u <- u / (lineMag * lineMag)
    if((u < 0.00001) || (u > 1)) {
    ## closest point does not fall within the line segment, take the shorter distance
    ## to an endpoint
    ix <- lineMagnitude(px, py, x1, y1)
    iy <- lineMagnitude(px, py, x2, y2)
    if(ix > iy) ans <- iy
    else ans <- ix
    } else {
    ## Intersecting point is on the line, use the formula
    ix <- x1 + u * (x2 - x1)
    iy <- y1 + u * (y2 - y1)
    ans <- lineMagnitude(px, py, ix, iy)
    }
    if(return_point == TRUE){
    Out <- c(ix = ix, iy=iy, ans=ans)
    }else{
    Out <- ans
    }
    Out
    }

    #now I tweak byrnes function to get also the CV and calculate a mean function modified by CV
    getMeanCV <- function(data, vars, standardizeFunction=standardizeUnitScale){
    ret<-colwise(standardizeFunction)(data[,which(names(data) %in% vars)])

    names(ret)<-paste(names(ret), ".std", sep="")
    ret$meanFunction<-rowSums(ret)/ncol(ret)
    # get CV
    ret$CVFunction<- apply(ret, MARGIN= 1, coef_var)
    # See relationship between mean and CV
    plot(ret$meanFunction ~ ret$CVFunction, xlim = c(0,1), ylim = c(0,1))
    abline(coef= c(1, -1)) #need to be sure is constrained between 0-1.
    # My rationale is that best sites will be in the upper left corner,
    # let's rank them along the 45º line, then.
    # first we find for each data point its closest point situated on the drawn line
    p<- matrix(ncol = 3, nrow = nrow(ret))
    for(i in 1:nrow(ret)){
    p[i,] <- distancePointLine(x= ret$CVFunction[i], y = ret$meanFunction[i],
    slope = -1, intercept = 1)
    }
    colnames(p) <- c("x","y","distToLine")
    ret <- cbind(ret,p[,c(1,2)])
    #plot the new points
    points(ret$x,ret$y, col = "red")
    #then I calculate its relative distances from each new point p to the worst point (i.e.site with max Y min X)
    minimum <- c(max(ret$CVFunction), min(ret$meanFunction))
    #to avoid a case with 0 function maybe is better to use as min c(0,1)?
    #minimum <- c(1,0)
    dis <- c()
    for(i in 1:nrow(ret)){
    points_ <- c(ret$CVFunction[i],ret$meanFunction[i])
    dis[i] <- sqrt(sum((points_ - minimum) ^ 2))
    }
    ret$MeanCV <- dis #store the new mean modified by CV
    #x and y can be removed from ret before exiting.
    #plot can also be removed, but helps visually
    return(ret)
    }
    #now, a site with a moderate mean function, but low CV (i.e. most functions are fullfiled)
    #is ranked better than a site with the same mean function,
    #but with high values for some functions and low for others.

    #Use this method in the germany example
    germany2 <-cbind(germany, getMeanCV(germany, vars))
    head(germany2)

    #see how the calculated measures correlate
    plot(germany2$meanFunction ~ germany2$MeanCV)
    plot(germany2$CVFunction ~ germany2$MeanCV)

    #See the reslut
    ggplot(aes(x=Diversity, y=MeanCV),data=germany2)+geom_point(size=3)+
    theme_bw(base_size=15)+
    stat_smooth(method="lm", colour="black", size=2)

    #To visualize all functions
    colnames(germany2)
    germanymelted <- melt(germany2[,c(8,129:134,141,144)], id.vars = "Diversity")

    ggplot(aes(x=Diversity, y=value),data=germanymelted)+geom_point(size=3)+
    facet_grid(~variable) + theme_bw(base_size=15)+
    stat_smooth(method="lm", colour="black", size=2) +
    xlab("\nSpecies Richness") +
    ylab("Standardized value of function\n")

    #Note that the modified mean value is artificial... so face values should not be compared, but only slopes.
    #In this case doesn't change much the output, I guess because most functions are correlated among them?