notebook: pangeo/pytorch-notebook
We need nvidia dev requirements if using the default containers (e.g. pangeo/pytorch-notebook
):
# Be sure to put a copy of the license file in "$HOME/gurobi.lic" for this to work!! | |
# Download the release | |
wget https://packages.gurobi.com/11.0/gurobi11.0.1_linux64.tar.gz # or path to most recent linux_64 download from https://www.gurobi.com/downloads/gurobi-software/ | |
# extract: | |
tar -xvf gurobi*_linux64.tar.gz | |
# Set environmental variables |
import xarray as xr | |
import rasterio | |
import rioxarray | |
import earthaccess | |
import os | |
from timebudget import timebudget | |
from pathlib import Path | |
# assumes we have a ~/.netrc created | |
cookies = os.path.expanduser("~/.urs_cookies") |
## Using Forecasts data, ~ 300+ GB in many very small partitions! | |
uri <- "s3://anonymous@bio230014-bucket01/neon4cast-forecasts/parquet/aquatics?endpoint_override=sdsc.osn.xsede.org" | |
bench::bench_time(df <- duckdbfs::open_dataset(uri)) | |
# process real | |
# 1.23m 4.91m | |
bench::bench_time( df <- arrow::open_dataset(uri)) | |
# process real | |
# 4.21m 32.46m |
## examples | |
# requires GDAL >= 3.6 | |
edl_set_token() | |
library(stars) | |
url <- "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T56JKT.2023246T235950.v2.0/HLS.L30.T56JKT.2023246T235950.v2.0.SAA.tif" | |
x <- read_stars(paste0("/vsicurl/",url)) | |
plot(x) |
# Note: this does not compare favorably to s3$ls() | |
list_directory_tree <- function(directory_path, recursive = TRUE) { | |
paths <- character(0) | |
meta <- jsonlite::read_json(directory_path) | |
entries <- list_paths(meta, directory_path) | |
paths <- c(paths, entries) | |
if (recursive && length(entries) > 0) { | |
for (subdir in meta$prefixes) { |
notebook: pangeo/pytorch-notebook
We need nvidia dev requirements if using the default containers (e.g. pangeo/pytorch-notebook
):
url <- "/vsicurl/https://minio.carlboettiger.info/public-biodiversity/carbon/Irrecoverable_Carbon_2010/Irrecoverable_C_Total_2010.tif" | |
library(gdalcubes) | |
library(spData) | |
library(sf) | |
library(stars) | |
india <- spData::world |> dplyr::filter(name_long=="India") | |
library(duckdbfs) | |
library(dplyr) | |
remotes::install_github("cboettig/duckdbfs") | |
library(arrow) | |
s3 <- s3_bucket("tern4cast-inventory", endpoint_override = "data.ecoforecast.org") | |
s3$ls("tern4cast-forecasts") | |
df <- open_dataset(s3$path("tern4cast-forecasts")) |> collect() | |
names(df) <- c("parquet", "theme", "model_id", "reference_datetime", "date", "part") |
library(arrow) | |
s3 <- s3_bucket("neon4cast-scores/parquet/aquatics", endpoint_override = "data.ecoforecast.org", anonymous=TRUE) | |
bench::bench_time( # very slow | |
ds <- open_dataset(s3) | |
) | |
bench::bench_time( # very slow, but available via S3 Inventory |
library(rstac) | |
library(gdalcubes) | |
# let's pick some spatial extent, say, a bbox around "India" | |
loc <- spData::world |> dplyr::filter(name_long == "India") | |
box <- sf::st_bbox(loc) | |
matches <- | |
stac("https://planetarycomputer.microsoft.com/api/stac/v1") |> | |
stac_search(collections = "io-biodiversity", |