Skip to content

Instantly share code, notes, and snippets.

@tastatham
Last active September 23, 2021 16:25
Show Gist options
  • Save tastatham/152b30fe87e8f78a9fc1889189e52aab to your computer and use it in GitHub Desktop.
Save tastatham/152b30fe87e8f78a9fc1889189e52aab to your computer and use it in GitHub Desktop.
spatial_shuffle.py
def spatial_shuffle(ddf, by="hilbert", column=None, npartitions=20, p=10, **kwargs):
"""
A function that spatially shuffles a Dask-GeoSeries object by a method
or a user-defined column
Parameters
----------
by : str
partitioning method or column
drop : bool
to drop the partitioning information held in index
npartitions : int
number to partition DataFrame
**kwargs:
abc
Returns
----------
dask Series :
Spatially sorted Dask-GeoPandas Series
"""
if column is None:
# Calculate partition methods
if by == "hilbert":
ddf[by] = ddf.hilbert_distance(p)
elif by == "morton":
ddf[by] = ddf.morton_distance(p)
elif by == "geohash":
ddf[by] = ddf.geohash(p)
else:
raise ValueError(
"Spatial partitioning only supports 'hilbert', 'morton' and 'geohash' methods"
)
ddf = ddf.shuffle(
on=by,
npartitions=npartitions,
**kwargs
)
if drop is True:
ddf = ddf.drop(by, axis=1)
ddf = ddf.set_geometry(col="geometry")
# Calculate convex hull of each partition
ddf.calculate_spatial_partitions()
return ddf
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment