Skip to content

Instantly share code, notes, and snippets.

@PMassicotte
Created March 28, 2022 11:58
Show Gist options
  • Save PMassicotte/6f6742f79f30d9b2e356e9b0d83a93a6 to your computer and use it in GitHub Desktop.
Save PMassicotte/6f6742f79f30d9b2e356e9b0d83a93a6 to your computer and use it in GitHub Desktop.
Playing around with tiledb
library(dplyr)
library(tiledb)
files <- fs::dir_ls("~/Desktop/ppz/")
uri <- "~/Desktop/tiledb"
# unlink(uri, recursive = TRUE)
dom <- tiledb_domain(dims = c(
tiledb_dim("longitude", c(-180.0, 180.0), type = "FLOAT64"),
tiledb_dim("latitude", c(-90.0, 90.0), type = "FLOAT64")
))
attr_filters <- tiledb_filter_list(tiledb_filter("ZSTD"))
all_attrs <- list(
primary_production = tiledb_attr(
"primary_production",
type = "FLOAT64",
filter_list = attr_filters
)
)
sc <-
tiledb_array_schema(
domain = dom,
attrs = all_attrs,
sparse = TRUE,
allows_dups = TRUE
)
tiledb_array_create(uri, sc)
db <- tiledb_array(uri, query_type = "WRITE", as.data.frame = TRUE)
for (i in files[1:100]) {
# print(i)
D <- data.table::fread(i)
if (nrow(D) >= 1) {
db[] <- D
}
}
tiledb_array_close(db)
db <- tiledb_array(uri, query_type = "READ", as.data.frame = TRUE)
# Very small footprint
object.size(db)
# Is it ok to query like this?
db[] |>
filter(longitude > 1)
# Or like this?
subset(db[], longitude > 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment