Skip to content

Instantly share code, notes, and snippets.

@scbrown86
Last active October 4, 2022 00:46
Show Gist options
  • Save scbrown86/87a8c49117d0be7ea8730a6f4d17fa2c to your computer and use it in GitHub Desktop.
Save scbrown86/87a8c49117d0be7ea8730a6f4d17fa2c to your computer and use it in GitHub Desktop.
extract background points from hypervolumes and calculate AUC and Boyce index
##%######################################################%##
# #
#### Extracts probability estimates ####
#### from calibration/validation points ####
#### for n-dimensional hypervolumes, ####
#### and compares these to ####
#### background values from the same hypervolume. ####
#### Values are then rescaled to {0, 1} ####
#### based on 10% omission rate, with ####
#### upper values capped at 90%. AUC and ####
#### Boyce indices are then calculated on rescaled ####
#### data. Stuart C Brown - 2021-10-18 ####
# #
##%######################################################%##
# function for background probability
## essentially the same as hypervolume::hypervolume_estimate_probability()
## but removes rows/weights where distances are 0.
## This is necessary because randomly sampled background points will have dist of 0 to itself
## which then becomes Inf when weighted
## need to provide n.points and a random.seed for sampling background points
background_probability <- function(hv, n.points = 10000, weight.exponent = -1,
random.seed = 65, set.edges.zero = TRUE,
edges.zero.distance.factor = 1, verbose = TRUE) {
require(pbapply)
np <- nrow(hv@RandomPoints)
dimhv <- ncol(hv@RandomPoints)
numpointstokeep_hv <- floor(min(c(n.points, np)))
if (numpointstokeep_hv != n.points & verbose) {
warning(sprintf("There were less than %s background points available. Using %s points. See hv@RandomPoints.", n.points, np),
call. = FALSE, immediate. = TRUE, noBreaks. = TRUE)
}
## set random seed for reproducibility
## only an issue of n.points is less than the number of random points in the hv
{set.seed(random.seed); index_ss <- sample(1:np, numpointstokeep_hv, replace = FALSE)}
hv_points_ss <- hv@RandomPoints[index_ss, , drop = FALSE]
prob_ss <- hv@ValueAtRandomPoints[index_ss]
point_density <- nrow(hv_points_ss)/hv@Volume
cutoff_dist <- point_density^(-1/dimhv)
if (verbose) {
cat(sprintf("Retaining %d/%d hypervolume random points for comparison with %d test points.\n",
nrow(hv_points_ss), nrow(hv@RandomPoints), np))
}
if (!verbose) {
pbapply::pboptions(type = "none")
} else {
pbapply::pboptions(type = "timer")
}
points <- as.matrix(hv_points_ss)
probabilities <- pbsapply(1:nrow(points), function(i) {
distances <- pdist::pdist(points[i, , drop = FALSE], hv_points_ss)@dist
# get indices for distance == 0. Self-distance.
dist0 <- which(distances == 0)
weights <- distances^weight.exponent
# exlude dist == 0 from results. Otherwise Inf.
result <- sum(prob_ss[-dist0] * weights[-dist0])/sum(weights[-dist0])
if (set.edges.zero == TRUE) {
if (min(distances) > cutoff_dist * edges.zero.distance.factor) {
result <- 0
}
}
return(result)
}, simplify = TRUE, USE.NAMES = FALSE)
return(probabilities)
}
TEST <- FALSE
if (TEST) {
library(data.table)
library(hypervolume)
library(scales)
library(raster)
library(rnaturalearth)
## enmSdm must must be installed from github - see below
if (!require(enmSdm)) {
library(remotes)
Sys.setenv(R_REMOTES_NO_ERRORS_FROM_WARNINGS = "true")
install_github("adamlilith/enmSdm", dependencies = TRUE)
install_github("adamlilith/omnibus")
library(enmSdm)
} else {
library(enmSdm)
}
# load in lat/lon data
data("quercus")
data_alba <- subset(quercus, Species == "Quercus alba")[, c("Longitude","Latitude")]
# get worldclim data from internet
climatelayers <- getData("worldclim", var = "bio", res = 10)
# z-transform climate layers to make axes comparable
climatelayers_ss <- climatelayers[[c(1,4,12,15)]]
climatelayers_ss <- scale(climatelayers_ss, center = TRUE, scale = TRUE)
climatelayers_ss_cropped <- aggregate(crop(climatelayers_ss, extent(-150,-50,15,60)), 3)
# extract transformed climate values
climate_alba <- extract(climatelayers_ss_cropped, data_alba)
{set.seed(54); valIDX <- sort(sample(1:nrow(climate_alba), 0.1 * nrow(climate_alba), replace = FALSE))}
# calibration set
climate_alba_cal <- climate_alba[-valIDX, ]
# validation set
climate_alba_val <- climate_alba[valIDX, ]
# compute Ggaussian hypervolume.
hv_alba <- hypervolume_gaussian(climate_alba_cal, name = "alba", samples.per.point = 10)
hv_alba
plot(hv_alba,
plot.function.additional = function(i, j) {
points(x = climate_alba_val[, i], y = climate_alba_val[, j],
col = "blue2", pch = 16)},
show.legend = FALSE)
# % of val points in hv
round(sum(hypervolume_inclusion_test(hv_alba, points = climate_alba_val, verbose = FALSE,
fast.or.accurate = "accurate"))/nrow(climate_alba_val), 2)*100
# probabilities
cal_hs <- hypervolume_estimate_probability(hv_alba, climate_alba_cal) # calibration points
val_hs <- hypervolume_estimate_probability(hv_alba, climate_alba_val) # validation points
# extract values from random points (equivalent to background points)
## see ?`Hypervolume-class` for details
## 10,000 or total (smaller or two) of background points sampled
bg_scores <- background_probability(hv_alba, n.points = 10000, random.seed = 20211018,
verbose = TRUE)
{par(mfrow = c(2,2))
hist(cal_hs, breaks = 20)
abline(v = quantile(cal_hs, probs = c(0.10, 0.90)), col = "red", lty = 2)
hist(val_hs, breaks = 20)
abline(v = quantile(cal_hs, probs = c(0.10, 0.90)), col = "red", lty = 2)
hist(bg_scores, breaks = 20)
abline(v = quantile(cal_hs, probs = c(0.10, 0.90)), col = "red", lty = 2)
text(mean(cal_hs), 450, "red lines show\np10 and p90\nof cal_hs")
par(mfrow = c(1,1))}
# Apply p10 threshold to exlude 10% of training presences
## p10 is defined from calibration data
p10 <- quantile(cal_hs, 0.10)
p90 <- quantile(cal_hs, 0.90)
# Threshold. Anything <= p10 becomes NA, anything > p90 becomes p90
## thresholded scores are then rescaled to {0, 1}
cal_hs[cal_hs <= p10] <- NA; cal_hs[cal_hs > p90] <- p90; cal_hs <- rescale(cal_hs, from = c(p10, p90), to = c(0, 1))
val_hs[val_hs <= p10] <- NA; val_hs[val_hs > p90] <- p90; val_hs <- rescale(val_hs, from = c(p10, p90), to = c(0, 1))
bg_scores[bg_scores <= p10] <- NA; bg_scores[bg_scores > p90] <- p90; bg_scores <- rescale(bg_scores, from = c(p10, p90), to = c(0, 1))
{par(mfrow = c(2,2))
hist(cal_hs, breaks = 20)
hist(val_hs, breaks = 20)
hist(bg_scores, breaks = 20)
par(mfrow = c(1,1))}
# AUC
aucWeighted(cal_hs, bg_scores, na.rm = TRUE)
aucWeighted(val_hs, bg_scores, na.rm = TRUE)
# Boyce
contBoyce(cal_hs, bg_scores, na.rm = TRUE, binWidth = 0.1)
contBoyce(val_hs, bg_scores, na.rm = TRUE, binWidth = 0.1)
contBoyce2x(cal_hs, bg_scores, na.rm = TRUE)
contBoyce2x(val_hs, bg_scores, na.rm = TRUE)
# Spatial projection
alba_map <- hypervolume_project(hv_alba, climatelayers_ss_cropped, set.edges.zero = TRUE)
plot(alba_map, col = hcl.colors(100, "Green-Yellow", rev = TRUE),
legend = TRUE, main = "Quercus alba [raw scale]",
sub = "Calibration points in blue; validation points in red",
addfun = function() {
lines(ne_coastline(110, "sp"))
points(data_alba[-valIDX, ], cex = 0.2, pch = 16, col = "#0000F068")
points(data_alba[valIDX, ], cex = 0.2, pch = 16, col = "#FF30305F")
})
# Thresholded spatial projection
## p10 is very conservative
alba_map_thr <- alba_map
alba_map_thr[alba_map_thr <= p10] <- NA
alba_map_thr[alba_map_thr > p90] <- p90
values(alba_map_thr) <- rescale(values(alba_map_thr), from = c(p10, p90), to = c(0, 1))
plot(alba_map_thr, col = hcl.colors(100, "Green-Yellow", rev = TRUE),
legend = TRUE, main = "Quercus alba [rescaled]",
sub = "Calibration points in blue; validation points in red",
zlim = c(0, 1),
addfun = function() {
lines(ne_coastline(110, "sp"))
points(data_alba[-valIDX, ], cex = 0.2, pch = 16, col = "#0000F068")
points(data_alba[valIDX, ], cex = 0.2, pch = 16, col = "#FF30305F")
})
# Thresholded spatial projection
## use minimum training presence
alba_map_mtp <- alba_map
mtp <- min(extract(alba_map, data_alba[-valIDX, ]), na.rm = TRUE)
maxtp <- max(values(alba_map_mtp), na.rm = TRUE)
alba_map_mtp[alba_map_mtp <= mtp] <- NA
alba_map_mtp <- raster::stretch(alba_map_mtp, minv = 0, maxv = 1, smin = mtp, smax = maxtp)
plot(alba_map_mtp, col = hcl.colors(100, "Green-Yellow", rev = TRUE),
legend = TRUE, main = "Quercus alba [rescaled mtp]",
sub = "Calibration points in blue; validation points in red",
zlim = c(0, 1),
addfun = function() {
lines(ne_coastline(110, "sp"))
points(data_alba[-valIDX, ], cex = 0.2, pch = 16, col = "#0000F068")
points(data_alba[valIDX, ], cex = 0.2, pch = 16, col = "#FF30305F")
})
# Thresholded spatial projection
## use 9th percentile of all values > 0 as upper bound. 0 as lower.
alba_map_q95 <- alba_map
q95 <- quantile(alba_map_q95[alba_map_q95 > 0], 0.95, na.rm = TRUE)
alba_map_q95 <- raster::stretch(alba_map_q95, minv = 0, maxv = 1, smin = 0, smax = q95)
plot(alba_map_q95, col = hcl.colors(100, "Green-Yellow", rev = TRUE),
legend = TRUE, main = "Quercus alba [rescaled q95]",
sub = "Calibration points in blue; validation points in red",
zlim = c(0, 1),
addfun = function() {
lines(ne_coastline(110, "sp"))
points(data_alba[-valIDX, ], cex = 0.2, pch = 16, col = "#0000F068")
points(data_alba[valIDX, ], cex = 0.2, pch = 16, col = "#FF30305F")
})
{par(mfrow = c(2,2))
hist(extract(alba_map_thr, data_alba), breaks = 20, xlim = c(0, 1), main = "p10/p90")
hist(extract(alba_map_mtp, data_alba), breaks = 20, xlim = c(0, 1), main = "mtp")
hist(extract(alba_map_q95, data_alba), breaks = 20, xlim = c(0, 1), main = "q95")
par(mfrow = c(1,1))}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment