Skip to content

Instantly share code, notes, and snippets.

@lpinner
Last active June 1, 2022 06:11
Show Gist options
  • Save lpinner/e45d229b57946a53453f3a7071360a5f to your computer and use it in GitHub Desktop.
Save lpinner/e45d229b57946a53453f3a7071360a5f to your computer and use it in GitHub Desktop.
from functools import partial
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from rasterstats import gen_zonal_stats # if using ArcGIS Pro, clone arcgispro-py3 and install rasterstats into the clone.
def histogram(arr, bins=10):
"""Generate a histogram
Args:
arr (array_like):
Input data array. The histogram is computed over the flattened array.
bins (int or sequence of scalars, optional):
If bins is an int, it defines the number of equal-width bins in the given range (10, by default).
If bins is a sequence, it defines a monotonically increasing array of bin edges,
including the rightmost edge, allowing for non-uniform bin widths.
Returns:
hist: array
"""
try:
return np.histogram(arr.compressed(), bins=bins)[0]
except AttributeError: # Not a masked array - 'numpy.ndarray' object has no attribute 'compressed'
return np.histogram(arr, bins=bins)[0]
def zonal_hist(vectors, id_field, raster, bins=10, band=1, affine=None, nodata=None, all_touched=False):
"""Zonal histograms of raster values aggregated to vector geometries
Args:
vectors (str or iterable):
path to an vector data source,
iterable of GeoJSON-like features (e.g. fiona source) or
iterable of python objects that support the __geo_interface__
id_field (str):
name of vector field that identifies zones
raster (str or ndarray):
path to raster or numpy ndarray with an affine transform
bins (sequence of scalars):
defines a monotonically increasing array of bin edges,
including the rightmost edge, allowing for non-uniform bin widths. Defaults to 10.
band (int, optional):
the band number to use (counting from 1). Defaults to 1.
affine (Affine, optional):
required only for ndarrays, otherwise it is read from raster. Defaults to None.
nodata (float, optional):
value overrides any NODATA value specified in the raster metadata.
If None, the raster metadata NODATA value (if any) will be used. Defaults to None.
all_touched (bool, optional):
Whether to include every raster cell touched by a geometry,
or only those having a center point within the polygon. Defaults to False
Returns:
pandas.DataFrame: DataFrame with id_field, bin1, ..., binN fields
"""
if type(bins) is int:
columns = [id_field] + [f"BIN{b}" for b in range(1, bins+1)]
else:
columns = [id_field] + [f"BIN{b}" for b in range(1, len(bins))]
zs = gen_zonal_stats(
vectors, raster, stats="count", add_stats={'histogram': partial(histogram, bins)},
geojson_out=True, band=band, affine=affine, nodata=nodata, all_touched=all_touched)
dfs = []
for feat in zs:
if feat["properties"]["count"] > 0:
data = [feat["properties"][id_field]] + list(feat["properties"]["histogram"])
dfs.append(pd.DataFrame([data], columns=columns))
df = pd.concat(dfs)
return df
def zonal_plot(df, figsize, xticks=None, yticks=None, xlabels=None, ylabels=None, *bar_args, **bar_kwargs):
"""Generate histogram plots and save to PNG files
Args:
df (pandas.DataFrame):
DataFrame with id_field, bin1, ..., binN fields
figsize (tuple):
Tuple of x & y dimensions in cm
xticks (array-like, optional):
The list of xtick locations. Passing an empty list removes all xticks. Defaults to None.
yticks (array-like, optional):
The list of ytick locations. Passing an empty list removes all yticks. Defaults to None.
xlabels (array-like, optional):
The labels to place at the given xticks locations. Defaults to None.
ylabels (array-like, optional):
The labels to place at the given yticks locations. Defaults to None.
*bar_args, **bar_kwargs (optional):
Arguments to pass to matplotlib.axes.Axes.bar
https://matplotlib.org/stable/api/_as_gen/matplotlib.axes.Axes.bar.html
Yields:
tuple(id, fig, ax): id_field value, plyplot figure and axes objects
"""
inches = 2.54 # inch to cm conversion
bins = df.columns[1:]
fig, ax = plt.subplots(figsize=(figsize[0]/inches, figsize[1]/inches))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ymax = max(df[bins].max())
xmax = len(bins)
x = list(range(xmax))
for row in df.itertuples(index=False):
y = row[1:]
ax.bar(x, y, width=-1, align='edge', *bar_args, **bar_kwargs)
plt.xlim([0, xmax])
plt.ylim([0, ymax])
if xticks is None:
ax.axes.xaxis.set_visible(False)
else:
plt.xticks(xticks, xlabels)
if yticks is None:
ax.axes.yaxis.set_visible(False)
else:
plt.yticks(yticks, ylabels)
yield row[0], fig, ax
plt.cla()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment