Carl Boettiger cboettig

## gurobi-setup.sh

# Be sure to put a copy of the license file in "$HOME/gurobi.lic" for this to work!!

# Download the release
wget https://packages.gurobi.com/11.0/gurobi11.0.1_linux64.tar.gz # or path to most recent linux_64 download from https://www.gurobi.com/downloads/gurobi-software/

# extract:
tar -xvf gurobi*_linux64.tar.gz

# Set environmental variables

## xarray-via-gdal.py
import xarray as xr
import rasterio
import rioxarray
import earthaccess
import os
from timebudget import timebudget
from pathlib import Path

# assumes we have a ~/.netrc created
cookies = os.path.expanduser("~/.urs_cookies")

## s3-provider-benchmarks.R
## Using Forecasts data, ~ 300+ GB in many very small partitions!

uri <- "s3://anonymous@bio230014-bucket01/neon4cast-forecasts/parquet/aquatics?endpoint_override=sdsc.osn.xsede.org"
bench::bench_time(df <- duckdbfs::open_dataset(uri))
# process    real
#   1.23m   4.91m

bench::bench_time( df <- arrow::open_dataset(uri))
# process    real
#  4.21m  32.46m

## earthdata_examples.R
## examples
# requires GDAL >= 3.6
edl_set_token()
library(stars)
url <- "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T56JKT.2023246T235950.v2.0/HLS.L30.T56JKT.2023246T235950.v2.0.SAA.tif"
x <- read_stars(paste0("/vsicurl/",url))
plot(x)

## recursive-source-prefixes.R
# Note: this does not compare favorably to s3$ls()

list_directory_tree <- function(directory_path, recursive = TRUE) {
  paths <- character(0)
  meta <- jsonlite::read_json(directory_path)
  entries <- list_paths(meta, directory_path)
  paths <- c(paths, entries)

  if (recursive && length(entries) > 0) {
    for (subdir in meta$prefixes) {

## pytorch-nvcc-config.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                cboettig
                / pytorch-nvcc-config.md
            
            
              Last active
              July 26, 2023 01:01
            
              
                NVIDIA warp drive: add nvidia dev libraries to container without breaking driver/lib matching
              
          
notebook: pangeo/pytorch-notebook
We need nvidia dev requirements if using the default containers (e.g. pangeo/pytorch-notebook):
https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/11.8.0/ubuntu2204/runtime/cudnn8/Dockerfile?ref_type=heads
https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/11.8.0/ubuntu2204/base/Dockerfile?ref_type=heads

  
## gdalcubes_raster.R

url <- "/vsicurl/https://minio.carlboettiger.info/public-biodiversity/carbon/Irrecoverable_Carbon_2010/Irrecoverable_C_Total_2010.tif"

library(gdalcubes)
library(spData)
library(sf)
library(stars)
india <- spData::world |> dplyr::filter(name_long=="India")


## duckdb-forecasts.R
library(duckdbfs)
library(dplyr)
remotes::install_github("cboettig/duckdbfs")

library(arrow)
s3 <- s3_bucket("tern4cast-inventory", endpoint_override = "data.ecoforecast.org")
s3$ls("tern4cast-forecasts")

df <- open_dataset(s3$path("tern4cast-forecasts")) |> collect()
names(df) <- c("parquet", "theme", "model_id", "reference_datetime", "date", "part")

## arrow-duckdb-uri-benchmarks.R
library(arrow)

s3 <- s3_bucket("neon4cast-scores/parquet/aquatics", endpoint_override = "data.ecoforecast.org", anonymous=TRUE)


bench::bench_time( # very slow
  ds <- open_dataset(s3)
)

bench::bench_time( # very slow, but available via S3 Inventory

## io-biodiversity.R
library(rstac)
library(gdalcubes)

# let's pick some spatial extent, say, a bbox around "India"
loc <- spData::world |> dplyr::filter(name_long == "India")
box <- sf::st_bbox(loc)

matches <-
  stac("https://planetarycomputer.microsoft.com/api/stac/v1") |>
  stac_search(collections = "io-biodiversity",

	# Be sure to put a copy of the license file in "$HOME/gurobi.lic" for this to work!!

	# Download the release
	wget https://packages.gurobi.com/11.0/gurobi11.0.1_linux64.tar.gz # or path to most recent linux_64 download from https://www.gurobi.com/downloads/gurobi-software/

	# extract:
	tar -xvf gurobi*_linux64.tar.gz

	# Set environmental variables
	import xarray as xr
	import rasterio
	import rioxarray
	import earthaccess
	import os
	from timebudget import timebudget
	from pathlib import Path

	# assumes we have a ~/.netrc created
	cookies = os.path.expanduser("~/.urs_cookies")
	## Using Forecasts data, ~ 300+ GB in many very small partitions!

	uri <- "s3://anonymous@bio230014-bucket01/neon4cast-forecasts/parquet/aquatics?endpoint_override=sdsc.osn.xsede.org"
	bench::bench_time(df <- duckdbfs::open_dataset(uri))
	# process real
	# 1.23m 4.91m

	bench::bench_time( df <- arrow::open_dataset(uri))
	# process real
	# 4.21m 32.46m
	## examples
	# requires GDAL >= 3.6
	edl_set_token()
	library(stars)
	url <- "https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T56JKT.2023246T235950.v2.0/HLS.L30.T56JKT.2023246T235950.v2.0.SAA.tif"
	x <- read_stars(paste0("/vsicurl/",url))
	plot(x)
	# Note: this does not compare favorably to s3$ls()

	list_directory_tree <- function(directory_path, recursive = TRUE) {
	paths <- character(0)
	meta <- jsonlite::read_json(directory_path)
	entries <- list_paths(meta, directory_path)
	paths <- c(paths, entries)

	if (recursive && length(entries) > 0) {
	for (subdir in meta$prefixes) {

	url <- "/vsicurl/https://minio.carlboettiger.info/public-biodiversity/carbon/Irrecoverable_Carbon_2010/Irrecoverable_C_Total_2010.tif"

	library(gdalcubes)
	library(spData)
	library(sf)
	library(stars)
	india <- spData::world \|> dplyr::filter(name_long=="India")
	library(duckdbfs)
	library(dplyr)
	remotes::install_github("cboettig/duckdbfs")

	library(arrow)
	s3 <- s3_bucket("tern4cast-inventory", endpoint_override = "data.ecoforecast.org")
	s3$ls("tern4cast-forecasts")

	df <- open_dataset(s3$path("tern4cast-forecasts")) \|> collect()
	names(df) <- c("parquet", "theme", "model_id", "reference_datetime", "date", "part")
	library(arrow)

	s3 <- s3_bucket("neon4cast-scores/parquet/aquatics", endpoint_override = "data.ecoforecast.org", anonymous=TRUE)


	bench::bench_time( # very slow
	ds <- open_dataset(s3)
	)

	bench::bench_time( # very slow, but available via S3 Inventory
	library(rstac)
	library(gdalcubes)

	# let's pick some spatial extent, say, a bbox around "India"
	loc <- spData::world \|> dplyr::filter(name_long == "India")
	box <- sf::st_bbox(loc)

	matches <-
	stac("https://planetarycomputer.microsoft.com/api/stac/v1") \|>
	stac_search(collections = "io-biodiversity",