Skip to content

Instantly share code, notes, and snippets.

@scottstanie
Last active February 6, 2025 01:57
Show Gist options
  • Select an option

  • Save scottstanie/ac00b77b1d5e6b9dcaa904a9d14c8fc3 to your computer and use it in GitHub Desktop.

Select an option

Save scottstanie/ac00b77b1d5e6b9dcaa904a9d14c8fc3 to your computer and use it in GitHub Desktop.
Export dolphin, or DISP, time series rasters to a GeoParquet format ingestible by QGIS
#!/usr/bin/env python3
# /// script
# dependencies = ["geopandas", "rioxarray", "tyro", "opera_utils"]
# ///
"""Create a GeoParquet file from multi-date InSAR displacement GeoTIFFs.
Optionally mask by similarity and coherence thresholds.
Example CLI usage (with tyro):
pipx run export_to_qgis.py \
--timeseries-pattern "timeseries/*.tif" \
--similarity-path "interferograms/similarity.tif" \
--similarity-threshold 0.5 \
--temporal-coherence-path "interferograms/temporal_coherence.tif" \
--temporal-coherence-threshold 0.6 \
--output-file "output_displacement.parquet"
"""
from __future__ import annotations
from typing import Literal
from pathlib import Path
import geopandas as gpd
import rioxarray
import tyro
import xarray as xr
from opera_utils import get_dates
def _preprocess_band(ds: xr.DataArray) -> xr.DataArray:
"""Expand a single-band raster to have a 'time' dimension derived from the filename.
Converts the DataArray from shape (y, x) -> (time, y, x).
Parameters
----------
ds : xr.DataArray
A 2D or 3D data array from rioxarray.open_rasterio.
fname : Path
The corresponding GeoTIFF filename.
Returns
-------
xr.DataArray
DataArray with an added 'time' dimension of size 1.
"""
fname = ds.encoding["source"]
# If multiple bands, just pick the first or consider additional logic.
if "band" in ds.coords and ds.coords["band"].size > 1:
ds = ds.isel(band=0)
# Use the secondary date as time dimension
# TODO: Ensure it's a single-reference timeseries?
date = get_dates(fname)[1]
return ds.expand_dims({"time": [date]})
def create_geoparquet(
timeseries_pattern: str,
output_file: Path,
similarity: Path | None = None,
similarity_threshold: float = 0.5,
temporal_coherence: Path | None = None,
temporal_coherence_threshold: float = 0.6,
coord_dtype: str = "float32",
units: Literal["meters", "millimeters"] = "millimeters",
) -> Path:
"""Combine multiple single-date displacement GeoTIFFs into vector format.
Optionally mask by similarity and temporal coherence rasters.
Parameters
----------
timeseries_pattern : str
Glob pattern for input time-series GeoTIFFs, e.g. "timeseries/*.tif".
output_file : Path
Output file path (Parquet format). E.g., "output.parquet"
similarity : [Path], default=None
Path to similarity raster (e.g., "interferograms/similarity.tif").
If provided, pixels below 'similarity_threshold' will be masked out.
similarity_threshold : float, default=0.5
Similarity threshold (0.0-1.0). Only used if similarity is provided.
temporal_coherence : [Path], default=None
Path to temporal coherence raster (e.g., "interferograms/temporal_coherence.tif")
If provided, pixels below 'temporal_coherence_threshold' will be masked out.
temporal_coherence_threshold : float, default=0.6
Temporal coherence threshold (0.0-1.0).
Only used if temporal_coherence is provided.
coord_dtype : str, default="float32"
Data type to use for the x and y coordinates.
If your raster is in EPSG:4326, x=longitude, y=latitude.
Otherwise it could be projected coords (UTM, etc.) with int coordinates.
units : Literal["meters", "millimeters"], default="millimeters"
Units of the displacement values.
If "millimeters", the displacement values will be multiplied by 1000.
Returns
-------
Path
The path to the written GeoParquet file.
"""
# Gather all timeseries tifs
tif_files = sorted(Path().glob(timeseries_pattern))
if not tif_files:
raise FileNotFoundError(
f"No files found matching pattern: {timeseries_pattern}"
)
ds = (
xr.open_mfdataset(tif_files, preprocess=_preprocess_band, parallel=True)
.rename({"band_data": "displacement"})
.drop_vars(["band"])
)
index = ["x", "y", "geometry"]
# Apply optional similarity mask
if similarity:
sim_da = rioxarray.open_rasterio(similarity, masked=True).squeeze(drop=True)
# Add similarity to the dataset
ds["similarity"] = sim_da
ds = ds.where(sim_da > similarity_threshold)
index.append("similarity")
# Apply optional temporal coherence mask
if temporal_coherence:
coh_da = rioxarray.open_rasterio(temporal_coherence, masked=True).squeeze(
drop=True
)
# Add temporal coherence to the dataset
ds["temporal_coherence"] = coh_da
ds = ds.where(coh_da > temporal_coherence_threshold)
index.append("temporal_coherence")
# Ensure we have a valid CRS in ds. If not set, you can do ds.rio.write_crs("EPSG:4326", inplace=True)
crs = ds.rio.crs
if not crs:
raise ValueError(
"No CRS found in the input rasters. Please ensure GeoTIFFs have a valid"
" projection."
)
# Flatten the time-series to a table: (time, y, x, displacement)
# Convert from xarray to a standard DataFrame
df = ds.to_dataframe().dropna().reset_index()
# TODO: For UTM, do we want this as integers?
# df.x = df.x.astype(int)
# df.y = df.y.astype(int)
df.drop(columns=["spatial_ref", "band"], inplace=True, errors="ignore")
if units == "millimeters":
df["displacement"] *= 1000
# Convert from (x, y) pixel coords to geometry
# If your raster is in EPSG:4326, x=longitude, y=latitude.
# Otherwise it could be projected coords (UTM, etc.)
# We rely on the existing coordinate definitions from rioxarray.
# They should be in 'x' and 'y'.
geometry = gpd.points_from_xy(x=df["x"], y=df["y"], crs=crs)
# Create a GeoDataFrame with the geometry
gdf = gpd.GeoDataFrame(df, geometry=geometry, crs=crs)
gdf.x = gdf.x.astype(coord_dtype)
gdf.y = gdf.y.astype(coord_dtype)
# Convert time column to the required format (DYYYYMMDD)
gdf.loc[:, "date_col"] = "D" + gdf["time"].dt.strftime("%Y%m%d")
# Pivot the dataframe
wide_gdf = gdf.pivot(
index=index,
columns="date_col",
values="displacement",
)
# Calculate velocity (TODO: this is the simple last - first method)
wide_gdf.loc[:, "velocity"] = (
gdf.groupby(["x", "y"])["displacement"].apply(
lambda x: (x.iloc[-1] - x.iloc[0]) / (len(x) - 1)
)
* 365.25
)
# Reset the index to make x, y, and geometry regular columns again
wide_gdf = gpd.GeoDataFrame(wide_gdf.reset_index(), geometry="geometry")
# Write to GeoParquet
wide_gdf.to_parquet(output_file, index=False)
print(f"GeoParquet written to: {output_file}")
return output_file
def main():
"""Command-line interface for create_geoparquet using tyro."""
tyro.cli(create_geoparquet)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment