Skip to content

Instantly share code, notes, and snippets.

@robbibt
Last active August 14, 2023 13:10
Show Gist options
  • Save robbibt/7b9def1d47f5eeb34d02d6bb67a315ee to your computer and use it in GitHub Desktop.
Save robbibt/7b9def1d47f5eeb34d02d6bb67a315ee to your computer and use it in GitHub Desktop.
`pc_load`: easily searching and loading data from Microsoft Planetary Computer
import numpy as np
import xarray as xr
def load_ls_s2(
x=None,
y=None,
geom=None,
start_date="2020",
end_date="2021",
resolution=30,
cloud_cover=10,
):
"""
Load NDWI from Landsat and Sentinel-2 hosted on Microsoft Planetary
Computer.
Parameters
----------
x, y : tuple, optional
Tuples defining the x and y bounding box to load, in WGS 84.
geom : datacube Geometry, optional
A datacube geometry object representing the spatial extents to
load data for. If provided, `x` and `y` will be ignored.
start_date, end_date : strings, optional
The start and end of the time period to load, expressed as
strings (e.g. "2020", "2021"; "2020-01", "2021-02")
resolution : int, optional
Spatial resolution to load data in. Defaults to 30 metres.
cloud cover : int, optional
The maximum threshold of cloud cover to load. Defaults to 10%.
Returns
-------
satellite_ds : xarray.Dataset
The loaded dataset as an `xarray.Dataset`, containing a single
"ndwi" `xarray.DataArray`.
"""
query_params = dict(
time=(start_date, end_date),
geom=geom if geom is not None else None,
x=x if geom is None else None,
y=y if geom is None else None,
)
load_params = dict(
crs="utm",
resolution=resolution,
chunks={"x": 3000, "y": 3000},
groupby="solar_day",
resampling={"qa_pixel": "nearest", "SCL": "nearest", "*": "cubic"},
)
# Load Landsat
ds_ls, items_ls = pc_load(
product="landsat-c2-l2",
bands=("green", "nir08", "qa_pixel"),
stac_query={
"eo:cloud_cover": {"lt": cloud_cover},
"platform": {"in": ["landsat-5", "landsat-8", "landsat-9"]},
"landsat:collection_category": {"in": ["T1"]},
},
**query_params,
**load_params,
)
# Load Sentinel-2
ds_s2, items_s2 = pc_load(
product="sentinel-2-l2a",
bands=("green", "nir", "SCL"),
stac_query={
"eo:cloud_cover": {"lt": cloud_cover},
},
**query_params,
**load_params,
)
# Apply Landsat cloud mask
cloud_mask = (
# Bit 3: high confidence cloud, bit 4: high confidence shadow
# https://medium.com/analytics-vidhya/python-for-geosciences-
# raster-bit-masks-explained-step-by-step-8620ed27141e
np.bitwise_and(ds_ls.qa_pixel, 1 << 3)
| np.bitwise_and(ds_ls.qa_pixel, 1 << 4)
) == 0
ds_ls = ds_ls.where(cloud_mask).drop("qa_pixel")
# Apply Sentinel-2 cloud mask
# 1: defective, 3: shadow, 9: high confidence cloud
cloud_mask = ~ds_s2.SCL.isin([1, 3, 9])
ds_s2 = ds_s2.where(cloud_mask).drop("SCL")
# Apply scaling
ds_ls = (ds_ls.where(ds_ls != 0) * 0.0000275 + -0.2).clip(0, 1)
ds_s2 = (ds_s2.where(ds_s2 != 0) * 0.0001).clip(0, 1)
# Convert to NDWI
ndwi_ls = (ds_ls.green - ds_ls.nir08) / (ds_ls.green + ds_ls.nir08)
ndwi_s2 = (ds_s2.green - ds_s2.nir) / (ds_s2.green + ds_s2.nir)
# Combine into a single dataset
satellite_ds = (
xr.concat([ndwi_ls, ndwi_s2], dim="time").sortby("time").to_dataset(name="ndwi")
)
return satellite_ds
# Example load
load_ls_s2(
x=(54.93, 55.05),
y=(25.04, 24.96),
start_date="2020",
end_date="2021",
cloud_cover=10,
)
def pc_load(
product,
time=None,
x=None,
y=None,
geom=None,
stac_query=None,
url="https://planetarycomputer.microsoft.com/api/stac/v1",
**load_params,
):
"""
Loads data from Microsoft Planetary Computer into an
`xarray.Dataset` using `odc-stac`.
Parameters
----------
product : str
The name of the product to load.
time : tuple, optional
The time range to load data for as a tuple of strings (e.g.
`("2020", "2021")`. If not provided, data will be loaded for
all available timesteps.
x, y : tuple, optional
Tuples defining the x and y bounding box to load, in WGS 84.
geom : datacube Geometry, optional
A datacube geometry object representing the spatial extents to
load data for. If provided, `x` and `y` will be ignored.
stac_query : dict, optional
A query dictionary to further filter the data using STAC metadata.
If not provided, no additional filtering will be applied.
url : str, optional
The URL of the Planetary Computer STAC API.
Defaults to "https://planetarycomputer.microsoft.com/api/stac/v1".
**load_params : dict
Additional parameters to be passed to `odc.stac.load()`.
Returns
-------
ds : xarray.Dataset
The loaded dataset as an `xarray.Dataset`.
items : pystac.item_collection.ItemCollection
STAC items returned by `pystac_client`.
"""
import odc.stac
import planetary_computer
import pystac_client
from odc.geo.geom import BoundingBox
# Connect to client
catalog = pystac_client.Client.open(
url,
modifier=planetary_computer.sign_inplace
if "planetarycomputer" in url
else None,
)
# Set up query
if geom is not None:
bbox = geom.boundingbox
else:
bbox = BoundingBox.from_xy(x, y)
time_range = "/".join(time) if time is not None else None
search = catalog.search(
collections=product,
bbox=bbox,
datetime=time_range,
query=stac_query if stac_query is not None else None,
)
# Check how many items were returned
items = search.item_collection()
print(f"Found {len(items)} STAC items for {product}")
# Load with ODC STAC
ds = odc.stac.load(
items=items,
bbox=bbox,
**load_params,
)
return ds, items
# Search and load data
ds, items = pc_load(
product="landsat-c2-l2",
bands=("red", "green", "blue"),
time=("2023-06", "2023-06"),
x=(54.93, 55.05),
y=(25.04, 24.96),
stac_query={"eo:cloud_cover": {"lt": 25}},
crs="utm", # Will load data in the local UTM zone CRS
resolution=30,
groupby="solar_day",
)
# Rescale using USGS C2 L2 scaling factors and plot
ds_rescaled = (ds.where(ds != 0) * 0.0000275 + -0.2).clip(0, 1)
ds_rescaled.to_array().plot.imshow(col="time", robust=True)
@robbibt
Copy link
Author

robbibt commented Jul 13, 2023

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment