Skip to content

Instantly share code, notes, and snippets.

@raymondben
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save raymondben/b5c4154d13ffc09c43bd to your computer and use it in GitHub Desktop.
Save raymondben/b5c4154d13ffc09c43bd to your computer and use it in GitHub Desktop.
Mapping eucalypt heights across Australia using ALA4R
## see http://untan.gl/articles/2014/10/08_using-ala4r-mapping-eucalypt-height.html
## load the ALA4R package
library(ALA4R)
## check that it's version 1.04 or later: the "guid" attribute on the sites_by_species data was added in that version
if (as.numeric(packageDescription("ALA4R",fields="Version"))<1.04)
stop("reinstall ALA4R to get v1.04 or later")
ala_config(cache_directory=file.path("~","data","ala_cache"))
## some other packages that we'll use
library(stringr)
library(plyr)
library(ggplot2)
## define the text searches (regular expressions) that we use to extract height information from a species profile
htre=data.frame(re=c('(growing to|grows to|heights of up to)[[:space:]]+([\\.[:digit:]]+)[[:space:]]*(m|metres)'),col_index=c(3),type=c("maximum"),stringsAsFactors=FALSE)
htre=rbind(htre,list('(tree|shrub)[[:punct:][:space:]]+(up to|to)[[:space:]]*([\\.[:digit:]]+)[[:space:]]*(m|metres)',4,"maximum"))
htre=rbind(htre,list('(tree|trees|shrub|shrubs|grows|grow|growing)[[:space:]]+to[[:space:]]+(roughly|about|around|over)[[:space:]]*([\\.[:digit:]]+)[[:space:]]*(m|metres)',4,"maximum")) ## text like "tree to about 15 metres, grows to about 10m"
htre=rbind(htre,list('(tree|trees|shrub|shrubs)[[:space:]]+(about|around|roughly|over)[[:space:]]*([\\.[:digit:]]+)[[:space:]]*(m|metres)[[:space:]]+(high|tall|in height)',4,"maximum")) ## text like "tree about 4 m high"
htre=rbind(htre,list('(to|may reach|can reach|up to|reaching|shrub|tree)[[:space:]]+([\\.[:digit:]]+)[[:space:]]*(m|metres)[[:space:]]+(high|tall|in height)',3,"maximum"))
htre=rbind(htre,list('(to|may reach|can reach|up to|reaching|shrub|tree)[[:space:]]+([\\.[:digit:]]+)[[:space:]]*(m|metres)[[:space:]]+or more (high|tall|in height)',3,"maximum")) ## e.g. "reaching 80m or more in height"
htre=rbind(htre,list('(to|may reach|can reach|up to|reaching|growing to|grows to)[[:space:]]+a[[:space:]]+height[[:space:]]+of[[:space:]]+([\\.[:digit:]]+)[[:space:]]*(m|metres)',3,"maximum"))
htre=rbind(htre,list('(to|may reach|can reach|up to|reaching|growing to|grows to)[[:space:]]+a[[:space:]]+maximum[[:space:]]+height[[:space:]]+of[[:space:]]+([\\.[:digit:]]+)[[:space:]]*(m|metres)',3,"maximum"))
htre=rbind(htre,list('(to|may reach|can reach|up to|reaching|growing to|shrub|shrubs|tree|trees|typically|generally|usually|commonly|about)[[:space:][:punct:]]+[\\.[:digit:]]+[[:space:]]*[–-][[:space:]]*([\\.[:digit:]]+)[[:space:]]*(m|metres)[[:space:]]+(high|tall|in height)',3,"maximum"))
htre=rbind(htre,list('(to|may reach|can reach|reaches|up to|reaching|growing to|shrub|shrubs|tree|trees)[[:space:][:punct:]]+(typically|generally|usually|commonly|about|of about)?[[:space:]]*[\\.[:digit:]]+[[:space:]]*[–-][[:space:]]*([\\.[:digit:]]+)[[:space:]]*(m|metres)[[:space:]]+(high|tall|in height)',4,"maximum")) ## e.g. "shrub, typically 2-3 metres high", "tree of about 6-9 metres in height", "reaches about 5-7 metres in height"
htre=rbind(htre,list('(height|shrub|tree|shrubs|trees)[[:space:][:punct:]]+(can range|ranging|mostly seen)[[:space:]]+between[[:space:]]*[\\.[:digit:]]+[[:space:]]*(–|and|-)[[:space:]]*([\\.[:digit:]]+)[[:space:]]*(m|metres)',5,"maximum")) ## "height can range between 2 and 25 metres", "tree ranging between 10 and 20 metres"
htre=rbind(htre,list('(grows to|grow to|growing to|grow)[[:space:]]+between[[:space:]]*[\\.[:digit:]]+[[:space:]]*[(m|metres)]?[[:space:]]*(–|and|-)[[:space:]]*([\\.[:digit:]]+)[[:space:]]*(m|metres)',4,"maximum"))
htre=rbind(htre,list('(occasionally reaching)[[:space:]]+([\\.[:digit:]]+)[[:space:]]*(m|metres)',3,"exceptional maximum")) ## e.g. "occasionally reaching 50 m"
## a function to apply those regular expressions
extract_heights=function(x){
x=x$simpleProperties$value
x=str_replace_all(x,"\\([[:digit:][:space:]–-]+(in|inches|ft|feet)\\)","") ## some descriptions give heights in metres and then repeat in imperial units - remove these
out=data.frame()
processed_x=x
for (k in 1:nrow(htre)) { ## iterate through each regular expression in turn
this_match=str_match_all(processed_x,ignore.case(htre[k,]$re))
this_match=ldply(this_match,function(z)if (is.null(nrow(z))) NULL else data.frame(type=htre[k,]$type,matched=z[,1],value=as.numeric(z[,htre[k,]$col_index])))
if (nrow(this_match)>0) {
out=rbind(out,this_match)
}
processed_x=str_replace_all(processed_x,ignore.case(htre[k,]$re),"") ## remove matches as we go, so that we don't match multiple times on the same text
}
out
}
## download our sites-by-species matrix, using a 0.5-degree grid cell size
ss=sites_by_species("genus:Eucalyptus",wkt="POLYGON((110 -45,155 -45,155 -10,110 -10,110 -45))",gridsize=0.5)
## convert coordinates to cell centres
ss$longitude=ss$longitude+0.25
ss$latitude=ss$latitude+0.25
all_guids=attr(ss,"guid") ## the identifier of each eucalypt taxon in the ss dataframe
guid_heights=rep(NA,length(all_guids))
for (k in 1:length(all_guids)) {
## iterate through each identifier and extract the mean tree height
x=species_info(guid=all_guids[k])
temp=extract_heights(x)
guid_heights[k]=mean(temp$value[temp$type=="maximum"])
}
## now, for each column in our sites-by-species matrix, calculate the average height of the species present, weighted by their prevalences
cellheight=apply(ss[,3:ncol(ss)],1,function(z)sum(z*guid_heights,na.rm=TRUE)/sum(z,na.rm=TRUE))
## there are a few cells that lie over the ocean (errors in record location) --- almost all of these have only a single species, so we'll remove any cell with only one species (this also removes some cells over land)
celln=rowSums(ss[,3:ncol(ss)]>0)
cellheight[celln<2]=NA
## plot the result
(ggplot(data.frame(lon=ss$longitude,lat=ss$latitude,height=cellheight),aes(x=lon,y=lat))+geom_tile(mapping=aes(fill=height))+scale_fill_gradientn(limit=c(0,50),na.value="transparent",colours=c("#709959","#A3C77D","#D6ED9A","#F0E48D","#E6BA85","#C9957F","#D6AD94","#F5E1C4","#FAF1E3")))
## Part 2: exploring the patterns
## pick some environmental layers. See ala_fields("layers") for a full list
env_layers=c("Precipitation - annual","Radiation - annual mean (Bio20)","Elevation ","Temperature - annual max mean","Wind speed - annual mean 9am or 3pm")
working_data=na.omit(xh)
## get the value of each of those environmental layers at each of our sample points
ep=intersect_points(working_data[,c("lat","lon")],env_layers)
working_data=na.omit(cbind(working_data,ep))
## fit a random forest regression model
library(randomForest)
fit=randomForest(height~precipitationAnnual+radiationAnnualMeanBio20+elevation+temperatureAnnualMaxMean+windSpeedAnnualMean9amOr3pm,data=working_data,importance=TRUE)
## plot the partial effects
imp=importance(fit)
op=par(mfrow=c(3,2))
for (i in sort(imp[,1],index.return=TRUE,decreasing=TRUE)$ix) {
thisvar=rownames(imp)[i]
partialPlot(fit,working_data,names(working_data)[names(working_data)==thisvar],xlab=sprintf("%s (importance %.1f)",thisvar,imp[i,1]),main="")
}
par(op)
## use the model to predict across Australia - first construct a lon/lat grid
grid_data=expand.grid(lon=seq(110,155,by=0.2),lat=seq(-45,-10,by=0.1))
grid_data=cbind(grid_data,intersect_points(grid_data[,c("lat","lon")],env_layers)) ## look up environmental data
grid_data$height=predict(fit,newdata=grid_data) ## use model to predict height
(ggplot(grid_data,aes(x=lon,y=lat))+geom_tile(mapping=aes(fill=height))+scale_fill_gradientn(limit=c(0,50),na.value="transparent",colours=c("#709959","#A3C77D","#D6ED9A","#F0E48D","#E6BA85","#C9957F","#D6AD94","#F5E1C4","#FAF1E3")))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment