Skip to content

Instantly share code, notes, and snippets.

@brownag
Created August 3, 2020 19:33
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save brownag/0fc3cf49835be9f3701a8c12c6d92da5 to your computer and use it in GitHub Desktop.
Attempts to build a decently-usable, but not quite fetchNASIS-level, gNATSGO SoilProfileCollection for stress testing the S4 methods
library(aqp)
library(sf)
library(soilDB)
library(data.table)
library(microbenchmark)
# path to gNATSGO Geodatabase
dsn <- "~/geodata/gNATSGO/2020/gNATSGO_CONUS.gdb"
# This is one of the main bottlenecks (not the only one) to reading out of GDB
# chorizon is a pretty large amount of data. Let's save it as a flat file (2.1GB uncompressd)
#
# in principle, I think all tables could be pre-read out of the GDB and then joined to a
# SPC "skeleton" based off of the chorizon data.
#
# system.time(h <- sf::read_sf(dsn = dsn, layer = "chorizon",
# stringsAsFactors = FALSE, as_tibble = FALSE))
# # user system elapsed
# # 216.374 5.678 222.533
#
# data.table::fwrite(h, file="~/geodata/gNATSGO/2020/horizons.csv")
# read the data in from flat file (TODO: consider SQLite storage of all tables and select * from chorizon)
starttime <- Sys.time()
gnatsgo <- data.table::fread(file = "~/geodata/gNATSGO/2020/horizons.csv")
print(Sys.time() - starttime)
# Time difference of 9.053402 secs
system.time(depths(gnatsgo) <- cokey ~ hzdept_r + hzdepb_r)
# user system elapsed
# 78.714 1.057 77.307
system.time(l <- get_legend_from_GDB(dsn, "areasymbol != ''"))
# user system elapsed
# 0.196 0.000 0.195
system.time(m <- get_mapunit_from_GDB(dsn, "muname != ''"))
# user system elapsed
# 6.555 0.077 6.926
system.time(co <- get_component_from_GDB(dsn = dsn, "compname != ''"))
# user system elapsed
# 35.453 0.384 36.051
# need chunking
# cgd <- soilDB:::.get_cogeomordesc_from_GDB(dsn, co = co)
# pmg <- soilDB:::.get_copmgrp_from_GDB(gdb.path, co = co)
system.time(site(gnatsgo) <- co)
# user system elapsed
# 4.334 0.004 3.248
system.time(site(gnatsgo) <- m)
# user system elapsed
# 3.247 0.000 2.700
system.time(site(gnatsgo) <- l)
# user system elapsed
# 2.838 0.000 2.499
microbenchmark::microbenchmark({ a <- gnatsgo[round(runif(1, 1, length(gnatsgo))), 1]},
{ b <- gnatsgo[round(runif(1, 1, length(gnatsgo))), 1:2]},
{ c <- gnatsgo[round(runif(2, 1, length(gnatsgo))), 1]},
{ d <- gnatsgo[round(runif(2, 1, length(gnatsgo))), 1:2]},
times = 10)
# one or two [random] pedons; one or two horizons [first or first and second]
# # Unit: milliseconds
# expr min lq mean median uq max neval cld
# { a <- gnatsgo[round(runif(1, 1, length(gnatsgo))), 1] } 116.9621 126.7610 131.7065 135.5488 136.5518 139.0037 10 a
# { b <- gnatsgo[round(runif(1, 1, length(gnatsgo))), 1:2] } 121.5565 134.4841 135.2277 137.6504 139.2550 141.0407 10 a
# { c <- gnatsgo[round(runif(2, 1, length(gnatsgo))), 1] } 117.7989 137.6253 140.2133 138.9760 151.9303 156.8037 10 ab
# { d <- gnatsgo[round(runif(2, 1, length(gnatsgo))), 1:2] } 132.5007 135.7209 149.1425 154.1004 155.4409 161.5209 10 b
# open-ended j indexing (generally a pretty slow operation)
microbenchmark::microbenchmark({foo <- gnatsgo[,1]
foo <- NULL},
{foo <- gnatsgo[,2:3]
foo <- NULL},
times = 5)
# first horizon or second and third horizon, all profiles
# Unit: seconds
# expr min lq mean median uq max neval cld
# { foo <- gnatsgo[, 1] foo <- NULL } 14.97619 15.46312 15.78757 16.09652 16.13384 16.26816 5 a
# { foo <- gnatsgo[, 2:3] foo <- NULL } 18.65325 18.68035 18.85315 18.83340 19.03189 19.06688 5 b
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment