Created
July 27, 2016 05:13
-
-
Save rmarrotte/fe7cccbaea7d921a6381d8574500621b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#################################### | |
# Run zonal statistics in parallel # | |
#################################### | |
# Clear workspace | |
rm(list=ls()) | |
# Libraries | |
library(raster) | |
library(gstat) | |
library(scales) | |
library(ggplot2) | |
library(snowfall) | |
# Make some fake data | |
xy <- expand.grid(1:500, 1:500) | |
names(xy) <- c("x","y") | |
# define the gstat object (spatial model) | |
g.dummy <- gstat(formula=z~1, locations=~x+y, dummy=T, beta=1, | |
model=vgm(psill=0.025,range=100,model="Exp"), nmax=20) | |
# make a simulation based on the gstat object | |
yy <- predict(g.dummy, newdata=xy, nsim=1) | |
# Scale the values between 500 and 1000 | |
yy$sim1 <- rescale(x = yy$sim1, to = c(500,1000)) | |
# Make integer raster | |
yy$sim1 <- round(yy$sim1,0) | |
# show one realisation | |
gridded(yy) <- ~x+y | |
DEM <- raster(yy) | |
rm(xy,g.dummy,yy) | |
# Plot this data | |
x11() | |
plot(DEM) | |
# I need to collect the zonal statistic at each point, | |
# I am using a 10 unit buffer, I collect the statistic using summary and | |
# add in the standard deviation also | |
# n is the number of random points to collect data for | |
output <- c() | |
for(n in c(10,100,1000,10000)){ | |
# Make some fake points | |
pnts <- sampleRandom(x = DEM, size = n, sp=T) | |
# Turn on timer | |
ptm <- proc.time() | |
# Extract the values | |
vals <- extract(DEM,pnts,buffer=100) | |
# Make a table | |
stats <- data.frame(do.call("rbind",lapply(vals,summary))) | |
stats$std <- unlist(lapply(vals,sd)) | |
# Print it | |
print(head(stats)) | |
# Output | |
output <- rbind(output,c(n,(proc.time() - ptm)[3])) | |
} | |
# The output df | |
output <- data.frame(output) | |
colnames(output)[1] <- c("Number_pnts") | |
# Zonal statistics functions, | |
# rat = raster | |
# pnts = Your points | |
# buff = the buffer length | |
# nthreads = Number of parallel processes | |
# bsize = The number of elements per thread | |
zonal_stats <- function(rat, pnts, buff, | |
nthreads = 4){ | |
# Bunch sizes, how many points per batch | |
bsize <- length(pnts) %/% nthreads | |
# Turn pnts into list of n batches | |
coords <- coordinates(pnts) | |
quotient <- dim(coords)[1]%/%bsize # quotient | |
# Sequences to make batches of points | |
seq1 <- seq(from = 1,to = dim(coords)[1],by = bsize) | |
seq2 <- c(seq(from = bsize,to = dim(coords)[1],by = bsize),dim(coords)[1]) | |
# Make List of batches of points | |
pnt_list <- list() | |
for(i in 1:length(seq1)){ | |
# These are the batches of bsize points | |
pnt_list[[length(pnt_list)+1]] <- SpatialPoints(coords = coords[seq1[i]:seq2[i],]) | |
} | |
rm(i,seq1,seq2,quotient) | |
# Inside function to get statistics for 1 single point within a buffer | |
get_pnt_stats <- function(pnts, rat, buff){ | |
# Libraries | |
require(raster) | |
require(sp) | |
# zonal vals | |
pnt_vals <- extract(x = rat, y = pnts, buffer = buff) | |
# Get stats for each | |
pnt_sums <- lapply(X = pnt_vals, FUN = function(vals){ | |
# Count NA's | |
nberNAs <- length(which(is.na(vals))) | |
nbvals <- length(which(!is.na(vals))) | |
# Remove NA's | |
vals <- vals[!is.na(vals)] | |
if(length(vals) > 0){ | |
stats <- summary(vals) | |
stats <- as.numeric(stats)[1:6] | |
std <- sd(vals) | |
stats <- c(stats,std,nbvals,nberNAs) | |
}else{ | |
stats <- c(rep(NA,7),nbvals,nberNAs) | |
} | |
names(stats) <- c("Min","Qu1","Median","Mean","Qu3","Max","Std","NbVals","NAs") | |
return(stats) | |
}) | |
# Rbind everything together to make a df | |
pnt_sums <- data.frame(do.call("rbind",pnt_sums)) | |
row.names(pnt_sums) <- row.names(pnts) | |
return(pnt_sums) | |
} | |
# Apply this little function over all the points | |
sfInit(parallel = T, cpus = nthreads) | |
stats_out <- sfLapply(pnt_list, | |
get_pnt_stats, | |
rat = rat, | |
buff = buff) | |
sfStop() | |
# Fix output | |
stats_out <- data.frame(do.call("rbind",stats_out)) | |
# return it | |
return(stats_out) | |
} | |
output_p <- c() | |
for(n in c(10,100,1000,10000)){ | |
# Make some fake points | |
pnts <- sampleRandom(x = DEM, size = n, sp=T) | |
# Turn on timer | |
ptm <- proc.time() | |
# Extract the values | |
stats <- zonal_stats(rat = DEM, | |
pnts = pnts, | |
buff = 100, | |
nthreads = 4) | |
# Print it | |
print(head(stats)) | |
# Output | |
output_p <- rbind(output_p,c(n,(proc.time() - ptm)[3])) | |
} | |
# The output df | |
output_p <- data.frame(output_p) | |
colnames(output_p)[1] <- c("Number_pnts") | |
# Bind both together | |
output_p$Processing <- "Parallel" | |
output$Processing <- "Sequential" | |
output_both <- rbind(output,output_p) | |
# ggplot | |
x11(12,12) | |
ggplot(data = output_both, aes(y = elapsed, x = Number_pnts)) + | |
geom_point(aes(shape=Processing),size = 3) + | |
scale_x_log10() + | |
ylab("Processing time") + | |
xlab("Number of points") + | |
theme_bw() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment