Skip to content

Instantly share code, notes, and snippets.

@scottstanie
Created October 9, 2023 17:17
Show Gist options
  • Save scottstanie/12ad15a28212c028b138b690bc79bcf9 to your computer and use it in GitHub Desktop.
Save scottstanie/12ad15a28212c028b138b690bc79bcf9 to your computer and use it in GitHub Desktop.
Open multiple displacement datasets as an `xarray` stack
import datetime
import re
from pathlib import Path
from typing import Union
import pandas as pd
import xarray as xr
Filename = Union[str, Path]
def open_stack(
filenames: list[Filename],
rechunk_by_time: bool = True,
):
"""Open a stack of files as a single xarray dataset.
Parameters
----------
filenames : list[Filename]
List of filenames to open.
rechunk_by_time : bool, default=True
Whether to rechunk the dataset so that all times are in a single chunk.
Returns
-------
xr.Dataset
The dataset containing all files.
"""
ds = xr.open_mfdataset(
filenames,
preprocess=_prep,
engine="rasterio",
)
if rechunk_by_time:
cur_chunks = ds.chunks
cur_chunks["time"] = (len(ds.time),)
ds = ds.chunk(cur_chunks)
return ds
def _prep(ds):
fname = ds.encoding["source"]
date = get_dates(fname)[0]
if len(ds.band) == 1:
ds = ds.sel(band=ds.band[0])
return ds.expand_dims(time=[pd.to_datetime(date)])
def get_dates(filename: Filename, fmt: str = "%Y%m%d") -> list[datetime.date]:
"""Search for dates in the stem of `filename` matching `fmt`.
Excludes dates that are not in the stem of `filename` (in the directories).
""" # noqa: E501
pattern = re.compile(r"\d{8}")
date_list = re.findall(pattern, Path(filename).stem)
if not date_list:
return []
return [_parse_date(d, fmt) for d in date_list]
def _parse_date(datestr: str, fmt: str = "%Y%m%d") -> datetime.date:
return datetime.datetime.strptime(datestr, fmt).date()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment