Skip to content

Instantly share code, notes, and snippets.

@lpinner
Last active June 12, 2022 13:11
Show Gist options
  • Save lpinner/f89816a318004b7dd73abc443534e84f to your computer and use it in GitHub Desktop.
Save lpinner/f89816a318004b7dd73abc443534e84f to your computer and use it in GitHub Desktop.
# dask geopandas rasterstats distributed demo
from dask.dataframe import from_delayed
from dask.distributed import Client, LocalCluster, as_completed
import dask_geopandas as dgpd
import geopandas as gpd
import pandas as pd
import scipy.stats
from rasterstats import zonal_stats
def kurtosis(arr):
try:
k = scipy.stats.kurtosis(arr.compressed())
except AttributeError: # Not a masked array - 'numpy.ndarray' object has no attribute 'compressed'
k = scipy.stats.kurtosis(arr)
return k
def zonal_statistics(row, *args, **kwargs):
gdf = row.compute()
return gdf.join(pd.DataFrame(zonal_stats(gdf, *args, **kwargs), index=gdf.index))
if __name__ == "__main__":
zones = "zones2.shp"
values = "values2.tif"
stats = ["mean", "std"]
add_stats = {"kurtosis": kurtosis}
cluster = LocalCluster()
client = Client(cluster)
gdf = gpd.read_file(zones)
ddf = dgpd.from_geopandas(gdf, npartitions=4)
# we need to tell dask_geopandas what the output should look like
meta = ddf._meta.join(pd.DataFrame(columns=stats+list(add_stats.keys())))
futures = [
client.submit(zonal_statistics, p, raster=values, stats=stats, add_stats=add_stats, all_touched=True)
for p in ddf.partitions
]
completed = []
for future in as_completed(futures):
try:
data = future.result()
completed.append(future)
except Exception as exc:
pass
results = from_delayed(completed, meta=meta, verify_meta=False).compute()
# dask geopandas rasterstats map_partitions demo
import dask_geopandas as dgpd
import geopandas as gpd
import pandas as pd
import scipy.stats
from rasterstats import zonal_stats
def kurtosis(arr):
try:
k = scipy.stats.kurtosis(arr.compressed())
except AttributeError: # Not a masked array - 'numpy.ndarray' object has no attribute 'compressed'
k = scipy.stats.kurtosis(arr)
return k
def zonal_statistics(row, *args, **kwargs):
return row.join(pd.DataFrame(zonal_stats(gdf, *args, **kwargs), index=gdf.index))
if __name__ == "__main__":
zones = "zones.shp"
values = "values.tif"
# pre-defined stats, see the full list at
# https://pythonhosted.org/rasterstats/manual.html#zonal-statistics
stats = ["mean", "std"]
# user-defined stats
# https://pythonhosted.org/rasterstats/manual.html#user-defined-statistics
add_stats = {"kurtosis": kurtosis}
gdf = gpd.read_file(zones)
ddf = dgpd.from_geopandas(gdf, npartitions=4)
# we need to tell dask_geopandas what the output should look like
meta = ddf._meta.join(pd.DataFrame(columns=stats+list(add_stats.keys())))
res = ddf.map_partitions(zonal_statistics, meta=meta, raster=values, stats=stats, add_stats=add_stats, all_touched=True)
results = res.compute()
print(gdf.head())
print(results.head())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment