Created
August 3, 2020 19:33
Star
You must be signed in to star a gist
Attempts to build a decently-usable, but not quite fetchNASIS-level, gNATSGO SoilProfileCollection for stress testing the S4 methods
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(aqp) | |
library(sf) | |
library(soilDB) | |
library(data.table) | |
library(microbenchmark) | |
# path to gNATSGO Geodatabase | |
dsn <- "~/geodata/gNATSGO/2020/gNATSGO_CONUS.gdb" | |
# This is one of the main bottlenecks (not the only one) to reading out of GDB | |
# chorizon is a pretty large amount of data. Let's save it as a flat file (2.1GB uncompressd) | |
# | |
# in principle, I think all tables could be pre-read out of the GDB and then joined to a | |
# SPC "skeleton" based off of the chorizon data. | |
# | |
# system.time(h <- sf::read_sf(dsn = dsn, layer = "chorizon", | |
# stringsAsFactors = FALSE, as_tibble = FALSE)) | |
# # user system elapsed | |
# # 216.374 5.678 222.533 | |
# | |
# data.table::fwrite(h, file="~/geodata/gNATSGO/2020/horizons.csv") | |
# read the data in from flat file (TODO: consider SQLite storage of all tables and select * from chorizon) | |
starttime <- Sys.time() | |
gnatsgo <- data.table::fread(file = "~/geodata/gNATSGO/2020/horizons.csv") | |
print(Sys.time() - starttime) | |
# Time difference of 9.053402 secs | |
system.time(depths(gnatsgo) <- cokey ~ hzdept_r + hzdepb_r) | |
# user system elapsed | |
# 78.714 1.057 77.307 | |
system.time(l <- get_legend_from_GDB(dsn, "areasymbol != ''")) | |
# user system elapsed | |
# 0.196 0.000 0.195 | |
system.time(m <- get_mapunit_from_GDB(dsn, "muname != ''")) | |
# user system elapsed | |
# 6.555 0.077 6.926 | |
system.time(co <- get_component_from_GDB(dsn = dsn, "compname != ''")) | |
# user system elapsed | |
# 35.453 0.384 36.051 | |
# need chunking | |
# cgd <- soilDB:::.get_cogeomordesc_from_GDB(dsn, co = co) | |
# pmg <- soilDB:::.get_copmgrp_from_GDB(gdb.path, co = co) | |
system.time(site(gnatsgo) <- co) | |
# user system elapsed | |
# 4.334 0.004 3.248 | |
system.time(site(gnatsgo) <- m) | |
# user system elapsed | |
# 3.247 0.000 2.700 | |
system.time(site(gnatsgo) <- l) | |
# user system elapsed | |
# 2.838 0.000 2.499 | |
microbenchmark::microbenchmark({ a <- gnatsgo[round(runif(1, 1, length(gnatsgo))), 1]}, | |
{ b <- gnatsgo[round(runif(1, 1, length(gnatsgo))), 1:2]}, | |
{ c <- gnatsgo[round(runif(2, 1, length(gnatsgo))), 1]}, | |
{ d <- gnatsgo[round(runif(2, 1, length(gnatsgo))), 1:2]}, | |
times = 10) | |
# one or two [random] pedons; one or two horizons [first or first and second] | |
# # Unit: milliseconds | |
# expr min lq mean median uq max neval cld | |
# { a <- gnatsgo[round(runif(1, 1, length(gnatsgo))), 1] } 116.9621 126.7610 131.7065 135.5488 136.5518 139.0037 10 a | |
# { b <- gnatsgo[round(runif(1, 1, length(gnatsgo))), 1:2] } 121.5565 134.4841 135.2277 137.6504 139.2550 141.0407 10 a | |
# { c <- gnatsgo[round(runif(2, 1, length(gnatsgo))), 1] } 117.7989 137.6253 140.2133 138.9760 151.9303 156.8037 10 ab | |
# { d <- gnatsgo[round(runif(2, 1, length(gnatsgo))), 1:2] } 132.5007 135.7209 149.1425 154.1004 155.4409 161.5209 10 b | |
# open-ended j indexing (generally a pretty slow operation) | |
microbenchmark::microbenchmark({foo <- gnatsgo[,1] | |
foo <- NULL}, | |
{foo <- gnatsgo[,2:3] | |
foo <- NULL}, | |
times = 5) | |
# first horizon or second and third horizon, all profiles | |
# Unit: seconds | |
# expr min lq mean median uq max neval cld | |
# { foo <- gnatsgo[, 1] foo <- NULL } 14.97619 15.46312 15.78757 16.09652 16.13384 16.26816 5 a | |
# { foo <- gnatsgo[, 2:3] foo <- NULL } 18.65325 18.68035 18.85315 18.83340 19.03189 19.06688 5 b |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment