Skip to content

Instantly share code, notes, and snippets.

@mdsumner
Created August 2, 2017 13:36
Show Gist options
  • Save mdsumner/ca460e7a9063dbf8592826e8a17cc419 to your computer and use it in GitHub Desktop.
Save mdsumner/ca460e7a9063dbf8592826e8a17cc419 to your computer and use it in GitHub Desktop.
library(raadtools)
fs <- icefiles()
library(blob)
library(tibble)
read_ice_blob <- function(x) readBin(x, "raw", n = 105212)
system.time({
## 421s
ice_blob <- tibble(blob = new_blob(purrr::map(fs$fullname, read_ice_blob)))
})
#pryr::object_size(ice_blob)
#1.32Gb
ice_blob$year <- as.integer(format(fs$date, "%Y"))
ice_blob$month <- as.integer(format(fs$date, "%m"))
ice_blob$day <- as.integer(format(fs$date, "%d"))
ice_blob$file <- fs$file
library(dplyr)
db <- src_sqlite("ice_db.sqlite3", create = TRUE)
ice_blob <- copy_to(db, ice_blob, temporary = FALSE, indexes = list(c("year", "month", "day")))
read_blob_NSIDC <- function(icyblob, setNA = TRUE, rescale = TRUE) {
ext <- c(-3950000, 3950000, -3950000, 4350000)
prj <- "+proj=stere +lat_0=-90 +lat_ts=-70 +lon_0=0 +k=1 +x_0=0 +y_0=0 +a=6378273 +b=6356889.449 +units=m +no_defs "
dims <- c(332L, 316L)
rtemplate <- raster(extent(ext), nrows = dims[1L], ncols = dims[2L],
crs = prj)
dat <- readBin(unlist(icyblob)[-c(1:300)], "integer", size = 1, n = prod(dims),
endian = "little", signed = FALSE)
r100 <- dat > 250
r0 <- dat < 1
if (rescale) {
dat <- dat/2.5
}
if (setNA) {
dat[r100] <- NA
}
r <- raster(t(matrix(dat, dims[1])), template = rtemplate)
if (!setNA && !rescale) {
rat <- data.frame(ID = 0:255, icecover = c(0:250,
"ArcticMask", "Unused", "Coastlines", "LandMask",
"Missing"), code = 0:255, stringsAsFactors = FALSE)
levels(r) <- rat
r
}
else {
r
}
}
read_blob_NSIDC(ice_blob %>% filter(year == 2015, month == 12, day == 5) %>% collect() %>% pull(blob) %>% unclass()) %>% plot()
read_blob_NSIDC(ice_blob %>% filter(year == 2016, month == 6, day == 5) %>% collect() %>% pull(blob) %>% unclass()) %>% plot()
system.time({
for (i in 1:365) {
dd <- sample(1:28, 1)
read_blob_NSIDC(ice_blob %>% filter(year == 2015, month == 12, day == dd) %>% collect() %>% pull(blob) %>% unclass())
}
})
## raadtools is faster than the iterative DB-collect in the loop
system.time(readice(seq(ISOdate(2015, 1, 1), ISOdate(2015, 12, 31), by = "1 day"), inputfiles = fs))
## but bulk collect wins
system.time({
tab <- ice_blob %>% filter(year == 2015) %>% collect()
br <- stack(lapply(tab$blob, read_blob_NSIDC), quick = TRUE)
})
print(br)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment