Skip to content

Instantly share code, notes, and snippets.

@mbacou
Last active August 8, 2020 22:37
Show Gist options
  • Save mbacou/41c2fa42fa36cef2a19d9291432b8560 to your computer and use it in GitHub Desktop.
Save mbacou/41c2fa42fa36cef2a19d9291432b8560 to your computer and use it in GitHub Desktop.
Repex for OpenCPU failed child process error
#' Repex - download 1200+ raster layers from USGS
#' @import data.table stringr
#' @importFrom tools file_path_sans_ext
#' @export
step1 = function(
x = c("2001-01-01", "2004-12-31"),
dir = tempdir()
) {
if(!dir.exists(dir)) dir.create(dir)
x = as.Date(x)
# Build daily time series
x = seq.Date(x[1], x[2], "day")
# Raster files from USGS server
cat = list(
proj = "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0",
file = function(x) sub("-0*", "", format(x, "rain_%Y-%j.bil")),
archive = function(x) sub("-0*", "", format(x, "rain_%Y-%j.tar.gz")),
url = "https://edcintl.cr.usgs.gov/downloads/sciweb1/shared/fews/web/africa/daily/rfe/downloads/daily/"
)
# Build target file names
dt = data.table(
code = "rfe",
date = x,
file = eval(cat[["file"]])(x),
archive = eval(cat[["archive"]])(x),
url = cat$url
)
# Add absolute download paths and file paths
dt[, dest := file.path(dir, tolower(code), archive)
][, file := file.path(dir, tolower(code), file)]
dir = dt[, unique(dirname(dest))]
for(i in dir) if(!dir.exists(i)) dir.create(i, recursive=TRUE)
dt[, status := file.exists(file)]
tmp = dt[!(status)]
# Download from remote
if(nrow(tmp) > 0) {
message("1) Attempting to download " , nrow(tmp), " archives from ", tmp[1, url], "... ")
for(i in seq(1, nrow(tmp), by=50)) {
N = min(i+50-1, nrow(tmp))
cat(i, "to", N, "\n")
res = try(download.file(
tmp[i:N, file.path(url, archive)],
tmp[i:N, file.path(dest)], method="libcurl", quiet=TRUE))
if(class(res)[1]=="try-error") warning(
"Failed to download any file in this batch")
}
} else message("1) All caught up, nothing to download.")
# Tally
dt[, status := file.exists(dest)]
if(dt[!(status), .N] > 0) warning(
"Failed to download ", dt[!(status), .N], " rasters")
if(dt[(status), .N]==0) warning(
"No layer was downloaded")
# Deflate all archives
message("2) Deflating ", dt[(status), uniqueN(dest)], " archives into ",
dt[(status), paste(unique(dirname(file)), collapse="\n")])
method = if(!is.na(Sys.getenv("TAR"))) Sys.getenv("TAR") else "internal"
for(f in dt[file.exists(dest) & str_sub(dest, -7)==".tar.gz", unique(dest)])
untar(f, exdir=dirname(f), extras="--skip-old-files", tar=method)
# Warn if any day over the period is missing from the catalog
dt[, status := file.exists(file)]
failed = setdiff(dt[(status), date], x)
if(length(failed) > 0) warning(
"There are ", failed, " missing ", dt[1, time],
" data layers over the requested period. Please investigate.")
dt[, `:=`(
# Make unique layer names
layer = make.names(file_path_sans_ext(basename(file)), unique=TRUE),
# Clean up
archive = NULL,
dest = NULL,
url = NULL
)]
return(dt)
}
#' Repex: spatial point extraction
#' @inheritDotParams step1
#' @import parallel data.table
#' @importFrom raster crs crs<- extract stack removeTmpFiles rasterOptions
#' @examples
#' dir <- tempdir()
#' dt <- step2(dir=dir)
#'
#' @export
step2 = function(
catalog = NULL,
pts = c(X=-0.74, Y=9.37),
mc.cores = getOption("mc.cores", 3L),
...) {
catalog = if(missing(catalog)) step1(...) else catalog
pts = data.table(id=1, X=pts["X"], Y=pts["Y"])
y = catalog[, seq(min(year(date)), max(year(date)))]
# extract() does not multithread with point queries, so force it with mclapply()
mc.cores = if(mc.cores > 1 && length(y) > 1) mc.cores else 1L
tmp = mclapply(y,
mc.cores=mc.cores, mc.silent=FALSE, mc.allow.recursive=FALSE, mc.cleanup=TRUE,
function(x) {
#rasterOptions(timer=FALSE, chunksize=2e+08, datatype=switch(code, rfe="INT1U", "FLT4S"))
r = stack(as.list(catalog[year(date)==x, file]), quick=TRUE, native=TRUE)
names(r) = catalog[year(date)==x, layer]
res = extract(r, pts[, .(X, Y)])
return(res)
})
# Collect all years
dt = lapply(tmp, function(x) {
res = data.table(x)[, id := pts[, id]]
res = melt(res, id.vars="id", variable.name="layer", value.name="value")
res[catalog, on=.(layer), date := date][, layer := NULL]
})
dt = rbindlist(dt)
return(dt)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment