Last active
June 12, 2022 13:11
-
-
Save lpinner/f89816a318004b7dd73abc443534e84f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# dask geopandas rasterstats distributed demo | |
from dask.dataframe import from_delayed | |
from dask.distributed import Client, LocalCluster, as_completed | |
import dask_geopandas as dgpd | |
import geopandas as gpd | |
import pandas as pd | |
import scipy.stats | |
from rasterstats import zonal_stats | |
def kurtosis(arr): | |
try: | |
k = scipy.stats.kurtosis(arr.compressed()) | |
except AttributeError: # Not a masked array - 'numpy.ndarray' object has no attribute 'compressed' | |
k = scipy.stats.kurtosis(arr) | |
return k | |
def zonal_statistics(row, *args, **kwargs): | |
gdf = row.compute() | |
return gdf.join(pd.DataFrame(zonal_stats(gdf, *args, **kwargs), index=gdf.index)) | |
if __name__ == "__main__": | |
zones = "zones2.shp" | |
values = "values2.tif" | |
stats = ["mean", "std"] | |
add_stats = {"kurtosis": kurtosis} | |
cluster = LocalCluster() | |
client = Client(cluster) | |
gdf = gpd.read_file(zones) | |
ddf = dgpd.from_geopandas(gdf, npartitions=4) | |
# we need to tell dask_geopandas what the output should look like | |
meta = ddf._meta.join(pd.DataFrame(columns=stats+list(add_stats.keys()))) | |
futures = [ | |
client.submit(zonal_statistics, p, raster=values, stats=stats, add_stats=add_stats, all_touched=True) | |
for p in ddf.partitions | |
] | |
completed = [] | |
for future in as_completed(futures): | |
try: | |
data = future.result() | |
completed.append(future) | |
except Exception as exc: | |
pass | |
results = from_delayed(completed, meta=meta, verify_meta=False).compute() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# dask geopandas rasterstats map_partitions demo | |
import dask_geopandas as dgpd | |
import geopandas as gpd | |
import pandas as pd | |
import scipy.stats | |
from rasterstats import zonal_stats | |
def kurtosis(arr): | |
try: | |
k = scipy.stats.kurtosis(arr.compressed()) | |
except AttributeError: # Not a masked array - 'numpy.ndarray' object has no attribute 'compressed' | |
k = scipy.stats.kurtosis(arr) | |
return k | |
def zonal_statistics(row, *args, **kwargs): | |
return row.join(pd.DataFrame(zonal_stats(gdf, *args, **kwargs), index=gdf.index)) | |
if __name__ == "__main__": | |
zones = "zones.shp" | |
values = "values.tif" | |
# pre-defined stats, see the full list at | |
# https://pythonhosted.org/rasterstats/manual.html#zonal-statistics | |
stats = ["mean", "std"] | |
# user-defined stats | |
# https://pythonhosted.org/rasterstats/manual.html#user-defined-statistics | |
add_stats = {"kurtosis": kurtosis} | |
gdf = gpd.read_file(zones) | |
ddf = dgpd.from_geopandas(gdf, npartitions=4) | |
# we need to tell dask_geopandas what the output should look like | |
meta = ddf._meta.join(pd.DataFrame(columns=stats+list(add_stats.keys()))) | |
res = ddf.map_partitions(zonal_statistics, meta=meta, raster=values, stats=stats, add_stats=add_stats, all_touched=True) | |
results = res.compute() | |
print(gdf.head()) | |
print(results.head()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment