Skip to content

Instantly share code, notes, and snippets.

@AugustT
Created June 14, 2016 09:45
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AugustT/aeddbe54b5f70efadf7087562460e94f to your computer and use it in GitHub Desktop.
Save AugustT/aeddbe54b5f70efadf7087562460e94f to your computer and use it in GitHub Desktop.
install.packages('zoon')
library(zoon)
vignette('Building_a_module')
#############
## Simples ##
#############
# I want to share my data via a ZOON module
# I uploaded it to figshare to make it accessible
# Here is the URL
URL <- "https://ndownloader.figshare.com/files/2519918"
# Here is the data
out <- read.csv(URL)
# Let's have a look
View(out)
# We don't need the date column
out <- out[, c("longitude", "latitude")]
# Our data is missing some of the required columns
out$value <- 1 # all our data are presences (0s would mean absence)
out$type <- 'presence'
out$fold <- 1 # we don't add any folds
# Now let's have a look
View(out)
# We can now write this as a function
Tom_Lorem_ipsum_UK <- function(){
# Get data
URL <- "https://ndownloader.figshare.com/files/2519918"
out <- read.csv(URL)
out <- out[, c("longitude", "latitude")]
# Add in the columns we dont have
out$value <- 1 # all our data are presences
out$type <- 'presence'
out$fold <- 1 # we wont add any folds
return(out)
}
# We can test it in a workflow to make sure it works as
# we would expect
library(zoon)
workl1 <- workflow(occurrence = Tom_Lorem_ipsum_UK,
covariate = UKBioclim,
process = OneHundredBackground,
model = LogisticRegression,
output = InteractiveMap)
# At the moment we have a function, to convert this into
# a module that we can share with others we use the
# BuildModule function
BuildModule(object = Tom_Lorem_ipsum_UK,
type = 'occurrence',
title = 'A dataset of Lorem ipsum occurrences',
description = paste0('The module retrieves a dataset of',
'Lorem ipsum records from figshare. This dataset contains',
'precence only data and was collected between 1990 and',
'2000 by members of to Lorem ipsum appreciation society'),
details = 'This dataset is fake, Lorem ipsum does not exist',
author = 'A.B. Ceidi',
email = 'ABCD@anemail.com',
dataType = 'presence-only')
rm(list = 'Tom_Lorem_ipsum_UK')
# This is how you would use a module that a colleague has sent you
LoadModule(module = 'Tom_Lorem_ipsum_UK.R')
work2 <- workflow(occurrence = Tom_Lorem_ipsum_UK,
covariate = UKAir,
process = Background(n = 50),
model = LogisticRegression,
output = PrintMap)
## SLIDE ##
######################
## More complicated ##
######################
# I want to build a module that creates a random raster
# of a user defined extent and resolution
# Note parameters and defaults
NaiveRandomRaster <- function(extent = c(-10, 10, 45, 65), res = 0.5, seed = NULL){
if(length(extent) != 4 | !inherits(extent, 'numeric')) stop("extent must be a numeric vector of length 4")
if(extent[1] >= extent[2]) stop('in extent - min x must be smaller than max x')
if(extent[3] >= extent[4]) stop('in extent - min y must be smaller than max y')
nrow <- (extent[2] - extent[1])/res
ncol <- (extent[4] - extent[3])/res
if(!is.null(seed)) set.seed(seed)
rasMat <- matrix(data = runif(n = nrow * ncol,
min = 0,
max = 100),
nrow = nrow,
ncol = ncol)
ras <- raster::raster(x = rasMat,
xmn = extent[1],
xmx = extent[2],
ymn = extent[3],
ymx = extent[4],
crs = "+proj=longlat +datum=WGS84")
return(ras)
}
# This time when we build the module we need to define the
# parameters
BuildModule(NaiveRandomRaster,
type = 'covariate',
title = 'Naive Random Raster',
description = 'Creates a random raster layer within the extent given',
author = 'ZOON Developers',
email = 'zoonproject@gmail.com',
paras = list(extent = 'A numeric vector of length 4 giving the coordinates of the rectangular region within which to create the raaster. order: xmin, xmax, ymin, ymax. By default the extent of the UK',
res = 'Numeric giving the resolution of the raster to produce. 1 is one cell per degree, 0.5 means one cell per half degree.',
seed = 'Used with set.seed to set a seed. Default NULL, no seed is used'))
rm(list = 'NaiveRandomRaster')
LoadModule('NaiveRandomRaster.R')
# You can see the metadata above is used in the help file
ModuleHelp('NaiveRandomRaster')
# Here it is in action
work3 <- workflow(occurrence = UKAnophelesPlumbeus,
covariate = list(NaiveRandomRaster(extent = c(-10, 2, 49, 58),
res = 1),
NaiveRandomRaster(extent = c(-10, 2, 49, 58),
res = 0.5),
NaiveRandomRaster(extent = c(-10, 2, 49, 58),
res = 0.1)),
process = Background(n = 50),
model = LogisticRegression,
output = PrintMap)
par(mfrow = c(1,3))
for(i in Output(work3)) plot(i)
## SLIDE ##
##################
## Model module ##
##################
# Model modules havea default argument .df which is a dataframe
# This looks similar to the occurrence dataset but with the
# covariate data added as extra columns
# Here is an example
.df <- Process(work3)[[1]]$df
View(.df)
# Lets write our function
GamGam <- function(.df){ # We need to add in the default arguments
# Specify the packages we need using the function
# GetPackage
zoon::GetPackage("gam")
# Create a data.frame of covariate data
covs <- .df[colnames(.df) %in% attr(.df, 'covCols')]
# do a bit of copy-pasting to define smooth terms for each covariate
f <- sprintf('.df$value ~ s(%s)',
paste(colnames(covs),
collapse = ') + s('))
# Run our gam model
m <- gam::gam(formula = formula(f),
data = covs,
family = binomial)
# Create a ZoonModel object to return.
# this includes our model, predict method
# and the packages we need.
ZoonModel(model = m,
code = {
# create empty vector of predictions
p <- rep(NA, nrow(newdata))
# omit NAs in new data
newdata_clean <- na.omit(newdata)
# get their indices
na_idx <- attr(newdata_clean, 'na.action')
# if there are no NAs then the index should
# include all rows, else it should name the
# rows to ignore
if (is.null(na_idx)){
idx <- 1:nrow(newdata)
} else {
idx <- -na_idx
}
# Use the predict function in gam to predict
# our new values
p[idx] <- gam::predict.gam(model,
newdata_clean,
type = 'response')
return (p)
},
packages = 'gam')
}
##############
## Cite Me! ##
##############
ZoonCitation('NaiveRandomRaster')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment