-
-
Save raymondben/a16ffd7fd31e69bd561a to your computer and use it in GitHub Desktop.
Creating 3D prints of species distributions using GBIF data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Creating 3D prints of species distributions using GBIF data | |
## Ben Raymond | |
## Last-Modified: <2015-02-27 14:12:25> | |
## change these paths to suit your needs | |
setwd("~/your/path/") | |
cache_directory="~/your/data/cache/path/" | |
## required packages | |
library(rgbif) | |
library(plyr) | |
library(R.cache) | |
library(ggplot2) | |
library(randomForest) | |
library(raster) | |
library(r2stl) | |
## We use the `R.cache` package to cache our web calls to the GBIF API. This just means that, having run a certain query once, we can re-run it without needing an internet connection. Specify the cache directory and create a cached version of the `occ_search` function: | |
setCacheRootPath(cache_directory) | |
cached_occ_search=addMemoization(occ_search) | |
## Specify the taxa we are interested in | |
parentKey=5298 ## Deer (Cervidae) | |
## although we are specifically interested in white-tailed deer Odocoileus virginianus and mule deer Odocoileus hemionus, we download all deer taxa so that we can use the locations of other species as absences in our species distribution modelling | |
this=cached_occ_search(taxonKey=parentKey,continent="north_america",limit=10000)$data | |
## quick check | |
md=map_data("world") | |
p=ggplot(data=subset(this,grepl("Odocoileus (h|v)",name)),aes(x=decimalLongitude,y=decimalLatitude))+geom_point(aes(color=name)) | |
p=p+geom_path(aes(x=long,y=lat,group=group),fill=NA,colour="black",data=md)+coord_map()+xlim(c(-175,-50))+ylim(c(0,65)) | |
p | |
## Download the bioclimatic and altitude layers from http://www.worldclim.org/current | |
## http://biogeo.ucdavis.edu/data/climate/worldclim/1_4/grid/cur/bio_5m_bil.zip | |
## and | |
## http://biogeo.ucdavis.edu/data/climate/worldclim/1_4/grid/cur/alt_5m_bil.zip | |
## | |
## These layers are: | |
## BIO1 = Annual Mean Temperature | |
## BIO2 = Mean Diurnal Range (Mean of monthly (max temp - min temp)) | |
## BIO3 = Isothermality (BIO2/BIO7) (* 100) | |
## BIO4 = Temperature Seasonality (standard deviation *100) | |
## BIO5 = Max Temperature of Warmest Month | |
## BIO6 = Min Temperature of Coldest Month | |
## BIO7 = Temperature Annual Range (BIO5-BIO6) | |
## BIO8 = Mean Temperature of Wettest Quarter | |
## BIO9 = Mean Temperature of Driest Quarter | |
## BIO10 = Mean Temperature of Warmest Quarter | |
## BIO11 = Mean Temperature of Coldest Quarter | |
## BIO12 = Annual Precipitation | |
## BIO13 = Precipitation of Wettest Month | |
## BIO14 = Precipitation of Driest Month | |
## BIO15 = Precipitation Seasonality (Coefficient of Variation) | |
## BIO16 = Precipitation of Wettest Quarter | |
## BIO17 = Precipitation of Driest Quarter | |
## BIO18 = Precipitation of Warmest Quarter | |
## BIO19 = Precipitation of Coldest Quarter | |
## We somewhat-arbitraily select seven of these layers to use in the modelling | |
## adjust this path to wherever you put the data files | |
envfiles=file.path("~/data/worldclim",c("alt.bil","bio1.bil","bio5.bil","bio6.bil","bio12.bil","bio16.bil","bio17.bil")) | |
envstack=stack(envfiles) | |
## extract environmental values at data locations | |
trainenv=extract(envstack,this[,c("decimalLongitude","decimalLatitude")]) | |
trainenv=as.data.frame(trainenv) | |
## fit random forest classification models | |
trainenv$ov=as.factor(grepl("Odocoileus virginianus",this$name)) | |
trainenv$oh=as.factor(grepl("Odocoileus hemionus",this$name)) | |
fit_oh=randomForest(oh~alt+bio1+bio5+bio6+bio12+bio16+bio17,data=na.omit(trainenv),importance=TRUE) | |
fit_ov=randomForest(ov~alt+bio1+bio5+bio6+bio12+bio16+bio17,data=na.omit(trainenv),importance=TRUE) | |
## create grid for predictions | |
gridlon=seq(-175,-50,by=0.5) | |
gridlat=seq(10,80,by=0.5) | |
grid_data=expand.grid(gridlon,gridlat) | |
names(grid_data)=c("lon","lat") | |
## extract environmental covariates across grid | |
grid_data=cbind(grid_data,extract(envstack,grid_data)) | |
## use models to predict over this region | |
grid_data$oh=predict(fit_oh,newdata=grid_data,type="prob",norm.votes=TRUE)[,2] | |
grid_data$ov=predict(fit_ov,newdata=grid_data,type="prob",norm.votes=TRUE)[,2] | |
## some manual adjustments of the results, mainly to remove some predicted areas that have resulted from extrapolations beyond our training data range | |
tempidx=grid_data$lon>-85 & grid_data$lat>30## & !is.na(grid_data$oh) | |
grid_data$oh[tempidx]=grid_data$oh[tempidx]*(1-(grid_data$lon[tempidx] + 85)/5) | |
grid_data$oh[grid_data$oh<0]=0 | |
grid_data$oh[grid_data$lat>65 & !is.na(grid_data$oh)]=0 | |
grid_data$oh[grid_data$lon< -140 & grid_data$lat<30 & !is.na(grid_data$oh)]=0 | |
grid_data$oh[grid_data$lon>-79.5 & grid_data$lon< -64.6737 & grid_data$lat>13.5996 & grid_data$lat<28 & !is.na(grid_data$oh)]=0 | |
grid_data$oh[grid_data$lon>-63.6737 & grid_data$lon< -59.8794 & grid_data$lat>12.0571 & grid_data$lat<18.7044 & !is.na(grid_data$oh)]=0 | |
grid_data$oh[grid_data$lon>-85.0864 & grid_data$lon< -78.7038 & grid_data$lat>20.2836 & grid_data$lat<24.3601 & !is.na(grid_data$oh)]=0 | |
grid_data$ov[grid_data$lon< -140 & grid_data$lat<30 & !is.na(grid_data$ov)]=0 | |
grid_data$ov[grid_data$lon>-79.5 & grid_data$lon< -64.6737 & grid_data$lat>13.5996 & grid_data$lat<28 & !is.na(grid_data$ov)]=0 | |
grid_data$ov[grid_data$lon>-63.6737 & grid_data$lon< -59.8794 & grid_data$lat>12.0571 & grid_data$lat<18.7044 & !is.na(grid_data$ov)]=0 | |
grid_data$ov[grid_data$lon>-85.0864 & grid_data$lon< -78.7038 & grid_data$lat>20.2836 & grid_data$lat<24.3601 & !is.na(grid_data$ov)]=0 | |
## check | |
p=ggplot(data=grid_data,aes(x=lon,y=lat))+geom_tile(aes(fill=oh)) | |
p=p+geom_path(aes(x=long,y=lat,group=group),fill=NA,colour="black",data=md)+xlim(c(-175,-50))+ylim(c(0,65))##+coord_map()+ | |
p=p+scale_fill_gradientn(limit=c(0,1),na.value="transparent",colours=c("#5E4EA1","#3287BD","#66C1A5","#ABDDA4","#E5F498","#FFFFBF","#FEDF8B","#FDAD60","#F36C43","#D43E4E","#9E0041")) | |
p | |
## create STL files for 3D printing | |
offset_height=0.1 ## step height of landmasses above baseplate, and of zero-probability above landmasses | |
## remember that the distributions themselves range from 0-1, so 0.1 is 1/10th of the full height of that | |
temp=matrix(grid_data$ov,nrow=length(gridlon))+offset_height | |
temp[is.na(temp)]=0 | |
temp=temp+offset_height | |
r2stl(gridlon,gridlat,temp,filename="Odocoileus_virginianus.stl") | |
temp=matrix(grid_data$oh,nrow=length(gridlon))+offset_height | |
temp[is.na(temp)]=0 | |
temp=temp+offset_height | |
r2stl(gridlon,gridlat,temp,filename="Odocoileus_hemionus.stl") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment