Last active
August 14, 2023 13:10
-
-
Save robbibt/7b9def1d47f5eeb34d02d6bb67a315ee to your computer and use it in GitHub Desktop.
`pc_load`: easily searching and loading data from Microsoft Planetary Computer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import xarray as xr | |
def load_ls_s2( | |
x=None, | |
y=None, | |
geom=None, | |
start_date="2020", | |
end_date="2021", | |
resolution=30, | |
cloud_cover=10, | |
): | |
""" | |
Load NDWI from Landsat and Sentinel-2 hosted on Microsoft Planetary | |
Computer. | |
Parameters | |
---------- | |
x, y : tuple, optional | |
Tuples defining the x and y bounding box to load, in WGS 84. | |
geom : datacube Geometry, optional | |
A datacube geometry object representing the spatial extents to | |
load data for. If provided, `x` and `y` will be ignored. | |
start_date, end_date : strings, optional | |
The start and end of the time period to load, expressed as | |
strings (e.g. "2020", "2021"; "2020-01", "2021-02") | |
resolution : int, optional | |
Spatial resolution to load data in. Defaults to 30 metres. | |
cloud cover : int, optional | |
The maximum threshold of cloud cover to load. Defaults to 10%. | |
Returns | |
------- | |
satellite_ds : xarray.Dataset | |
The loaded dataset as an `xarray.Dataset`, containing a single | |
"ndwi" `xarray.DataArray`. | |
""" | |
query_params = dict( | |
time=(start_date, end_date), | |
geom=geom if geom is not None else None, | |
x=x if geom is None else None, | |
y=y if geom is None else None, | |
) | |
load_params = dict( | |
crs="utm", | |
resolution=resolution, | |
chunks={"x": 3000, "y": 3000}, | |
groupby="solar_day", | |
resampling={"qa_pixel": "nearest", "SCL": "nearest", "*": "cubic"}, | |
) | |
# Load Landsat | |
ds_ls, items_ls = pc_load( | |
product="landsat-c2-l2", | |
bands=("green", "nir08", "qa_pixel"), | |
stac_query={ | |
"eo:cloud_cover": {"lt": cloud_cover}, | |
"platform": {"in": ["landsat-5", "landsat-8", "landsat-9"]}, | |
"landsat:collection_category": {"in": ["T1"]}, | |
}, | |
**query_params, | |
**load_params, | |
) | |
# Load Sentinel-2 | |
ds_s2, items_s2 = pc_load( | |
product="sentinel-2-l2a", | |
bands=("green", "nir", "SCL"), | |
stac_query={ | |
"eo:cloud_cover": {"lt": cloud_cover}, | |
}, | |
**query_params, | |
**load_params, | |
) | |
# Apply Landsat cloud mask | |
cloud_mask = ( | |
# Bit 3: high confidence cloud, bit 4: high confidence shadow | |
# https://medium.com/analytics-vidhya/python-for-geosciences- | |
# raster-bit-masks-explained-step-by-step-8620ed27141e | |
np.bitwise_and(ds_ls.qa_pixel, 1 << 3) | |
| np.bitwise_and(ds_ls.qa_pixel, 1 << 4) | |
) == 0 | |
ds_ls = ds_ls.where(cloud_mask).drop("qa_pixel") | |
# Apply Sentinel-2 cloud mask | |
# 1: defective, 3: shadow, 9: high confidence cloud | |
cloud_mask = ~ds_s2.SCL.isin([1, 3, 9]) | |
ds_s2 = ds_s2.where(cloud_mask).drop("SCL") | |
# Apply scaling | |
ds_ls = (ds_ls.where(ds_ls != 0) * 0.0000275 + -0.2).clip(0, 1) | |
ds_s2 = (ds_s2.where(ds_s2 != 0) * 0.0001).clip(0, 1) | |
# Convert to NDWI | |
ndwi_ls = (ds_ls.green - ds_ls.nir08) / (ds_ls.green + ds_ls.nir08) | |
ndwi_s2 = (ds_s2.green - ds_s2.nir) / (ds_s2.green + ds_s2.nir) | |
# Combine into a single dataset | |
satellite_ds = ( | |
xr.concat([ndwi_ls, ndwi_s2], dim="time").sortby("time").to_dataset(name="ndwi") | |
) | |
return satellite_ds | |
# Example load | |
load_ls_s2( | |
x=(54.93, 55.05), | |
y=(25.04, 24.96), | |
start_date="2020", | |
end_date="2021", | |
cloud_cover=10, | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def pc_load( | |
product, | |
time=None, | |
x=None, | |
y=None, | |
geom=None, | |
stac_query=None, | |
url="https://planetarycomputer.microsoft.com/api/stac/v1", | |
**load_params, | |
): | |
""" | |
Loads data from Microsoft Planetary Computer into an | |
`xarray.Dataset` using `odc-stac`. | |
Parameters | |
---------- | |
product : str | |
The name of the product to load. | |
time : tuple, optional | |
The time range to load data for as a tuple of strings (e.g. | |
`("2020", "2021")`. If not provided, data will be loaded for | |
all available timesteps. | |
x, y : tuple, optional | |
Tuples defining the x and y bounding box to load, in WGS 84. | |
geom : datacube Geometry, optional | |
A datacube geometry object representing the spatial extents to | |
load data for. If provided, `x` and `y` will be ignored. | |
stac_query : dict, optional | |
A query dictionary to further filter the data using STAC metadata. | |
If not provided, no additional filtering will be applied. | |
url : str, optional | |
The URL of the Planetary Computer STAC API. | |
Defaults to "https://planetarycomputer.microsoft.com/api/stac/v1". | |
**load_params : dict | |
Additional parameters to be passed to `odc.stac.load()`. | |
Returns | |
------- | |
ds : xarray.Dataset | |
The loaded dataset as an `xarray.Dataset`. | |
items : pystac.item_collection.ItemCollection | |
STAC items returned by `pystac_client`. | |
""" | |
import odc.stac | |
import planetary_computer | |
import pystac_client | |
from odc.geo.geom import BoundingBox | |
# Connect to client | |
catalog = pystac_client.Client.open( | |
url, | |
modifier=planetary_computer.sign_inplace | |
if "planetarycomputer" in url | |
else None, | |
) | |
# Set up query | |
if geom is not None: | |
bbox = geom.boundingbox | |
else: | |
bbox = BoundingBox.from_xy(x, y) | |
time_range = "/".join(time) if time is not None else None | |
search = catalog.search( | |
collections=product, | |
bbox=bbox, | |
datetime=time_range, | |
query=stac_query if stac_query is not None else None, | |
) | |
# Check how many items were returned | |
items = search.item_collection() | |
print(f"Found {len(items)} STAC items for {product}") | |
# Load with ODC STAC | |
ds = odc.stac.load( | |
items=items, | |
bbox=bbox, | |
**load_params, | |
) | |
return ds, items | |
# Search and load data | |
ds, items = pc_load( | |
product="landsat-c2-l2", | |
bands=("red", "green", "blue"), | |
time=("2023-06", "2023-06"), | |
x=(54.93, 55.05), | |
y=(25.04, 24.96), | |
stac_query={"eo:cloud_cover": {"lt": 25}}, | |
crs="utm", # Will load data in the local UTM zone CRS | |
resolution=30, | |
groupby="solar_day", | |
) | |
# Rescale using USGS C2 L2 scaling factors and plot | |
ds_rescaled = (ds.where(ds != 0) * 0.0000275 + -0.2).clip(0, 1) | |
ds_rescaled.to_array().plot.imshow(col="time", robust=True) |
Author
robbibt
commented
Jul 13, 2023
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment