Skip to content

Instantly share code, notes, and snippets.

@cboettig
Last active December 16, 2022 06:01
Show Gist options
  • Save cboettig/2ba4bd6a2ee728e5ef6e072b3d7ec146 to your computer and use it in GitHub Desktop.
Save cboettig/2ba4bd6a2ee728e5ef6e072b3d7ec146 to your computer and use it in GitHub Desktop.
library(duckdb)
library(DBI)
library(glue)
library(dplyr)
conn <- DBI::dbConnect(duckdb(), ":memory:",
config=list("memory_limit"="12GB",
"temp_directory" = "/tmp"))
DBI::dbExecute(conn, "INSTALL 'httpfs';")
DBI::dbExecute(conn, "LOAD 'httpfs';")
endpoint <- "data.ecoforecast.org"
DBI::dbExecute(conn, glue("SET s3_endpoint='{endpoint}';"))
DBI::dbExecute(conn, glue("SET s3_url_style='path';"))
tblname <- "phenology"
parquet <- "s3://neon4cast-forecasts/parquet/phenology/*/*/*/*.parquet"
#view_query <-glue("CREATE VIEW '{tblname}' ", "AS SELECT * FROM parquet_scan('{parquet}', HIVE_PARTITIONING=true);")
view_query <-glue("CREATE VIEW '{tblname}' ", "AS SELECT * FROM parquet_scan('{parquet}');")
bench::bench_time( DBI::dbSendQuery(conn, view_query))
bench::bench_time( haha <- tbl(conn, tblname) )
bench::bench_time( print(haha) )
library(arrow)
bench::bench_time( s3 <- s3_bucket("neon4cast-forecasts/parquet/phenology", endpoint_override=endpoint) )
bench::bench_time( df <- open_dataset(s3, unify_schemas = FALSE))
library(duckdb)
library(DBI)
library(glue)
library(dplyr)
conn <- DBI::dbConnect(duckdb(), ":memory:",
config=list("memory_limit"="12GB",
"temp_directory" = "/tmp"))
DBI::dbExecute(conn, "INSTALL 'httpfs';")
DBI::dbExecute(conn, "LOAD 'httpfs';")
DBI::dbExecute(conn, glue("SET s3_url_style='path';"))
tblname <- "gbif"
bucket <- "s3://gbif-open-data-us-east-1/"
parquet <- glue(bucket, "occurrence/2022-12-01/occurrence.parquet/*")
view_query <-glue("CREATE VIEW '{tblname}' ", "AS SELECT * FROM parquet_scan('{parquet}');")
DBI::dbSendQuery(conn, view_query)
gbif <- tbl(conn, tblname)
df <-
gbif |> mutate(latitude = round(decimallatitude,1),
longitude = round(decimallongitude,1)) |>
count(longitude, latitude) |>
collect() |>
mutate(n = log(n)) |>
ungroup() |> na.omit()
library(stars)
r <- stars::st_as_stars(df, coords=c("longitude", "latitude"), crs=4326)
plot(r)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment