Skip to content

Instantly share code, notes, and snippets.

View martindurant's full-sized avatar

Martin Durant martindurant

View GitHub Profile
@martindurant
martindurant / ex1.ipynb
Created July 19, 2023 17:32
Intake 2 examples
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@martindurant
martindurant / intake-next.ipynb
Last active June 8, 2023 16:47
What the rewrite is for
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@martindurant
martindurant / big.ipynb
Created May 14, 2023 16:45
parquet merger
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@martindurant
martindurant / find_columns.py
Last active May 1, 2023 20:52
Dataframe column optimization
from dask.layers import DataFrameIOLayer
import pandas as pd
def find_columns(df):
io_layers = {k: lay for k, lay in df.dask.layers.items() if isinstance(lay, DataFrameIOLayer)}
required = {k: set() for k in io_layers}
for k, io_lay in io_layers.items():
allcols = io_lay.collection_annotations["series_dtypes"]
import astropy.io.fits._tiled_compression as tiled
from astropy.io import fits
import numcodecs
import fsspec
import zarr
import json
import numpy as np
class MY_RICE(tiled.codecs.Rice1):
@martindurant
martindurant / tar_kerchunk.py
Created October 31, 2022 18:26
Single file kerchunking
import kerchunk.hdf
import kerchunk.combine
import fsspec
import tarfile
import xarray as xr
ro = dict(
s3={
"anon": True,
@martindurant
martindurant / icechunk.ipynb
Created September 23, 2022 15:27
icechunk1
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@martindurant
martindurant / bigtar.py
Last active September 20, 2022 17:14
Single file datasets
import kerchunk.hdf
import kerchunk.combine
import fsspec
import tarfile
import xarray as xr
ro = dict(
s3={
"anon": True,
@martindurant
martindurant / env
Created May 27, 2021 20:54
test env
name: pangeo-forge-recipes
channels:
- conda-forge
- defaults
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=1_gnu
- affine=2.3.0=py_0
- aiohttp=3.7.4=py38h497a2fe_0
- appdirs=1.4.4=pyh9f0ad1d_0
@martindurant
martindurant / matanzas2.py
Created May 21, 2021 19:05
build references for second matanzas dataset
url = "s3://prod-is-usgs-sb-prod-publish/609bf69ed34ea221ce39b261/breach_matanzas.nc"
from fsspec_reference_maker.hdf import *
so = dict(
mode='rb', anon=True, requester_pays=True,
default_fill_cache=False, default_cache_type='first'
)
with fsspec.open(url, **so) as f:
h5chunks = SingleHdf5ToZarr(f, url, xarray=True, inline_threshold=100)
out = h5chunks.translate()