Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Functions to batch extract similar raster images
import os
import zipfile
import fiona
import rasterio
import datetime
import numpy as np
import pandas as pd
import geopandas as gpd
import ohio.ext.pandas
from pathlib import Path
from tempfile import NamedTemporaryFile
from rasterio.io import MemoryFile
from shapely.geometry import box, asShape
from rasterstats import zonal_stats
from rasterio.io import MemoryFile, ZipMemoryFile
from shapely import speedups
if speedups.available:
speedups.enable()
def draw_envelope(zipfile_path,
tol=0):
"""
Create inverse mak from neighbor raster layers.
When extracting values from rasters, most of the functions rely on
rasterization (i.e. '''rasterstats.rasterstats''') to index the values
corresponding to each polygon or geometry passed. This process can be
intensive if the we are extracting from several raster layers, since
the rasterization is done for each of the geometries (polygons, mostly) in
the list and for each of the raster layers, even if they have the same
extent.
One possible way to optimize this behavior is to pass a pre-calculated mask
that index each of the polygons to a list of rasters. This can be done by
assuming (and checking) that all rasters have the same extent, hence, the
indices corresponging to a polygon will be equal in all polygons.
Nonetheless, if rasters have different rasters, things are not
straightforward. This function will solve that problem for a particular
case where rasters are within a common neighborhood.
This function will create an "envelope" layer that will cover all the
raster elements passed, and return a raster layer with no values, but with
a bounding box (using the shapely.geometry.box) with the needed extent to
cover all the layers. A tolerance argument is given to define how much are
should be added to the max/min values of the list of bounding boxes of the
given rasters.
:param lst list_geoms: a list of `rasterio` objects with neighboring
boundings. It can be pathlib.Paths.
:param float tol: a float giving how much tolerance should be given
to the returned bounding box. This value is 0.5 by default (assuming a
degree projection this would be an additional 0.5 degrees by each element
of the boundary).
:returns: A masked raster layer.
"""
zip_files = []
with zipfile.ZipFile(zipfile_path, 'r') as zf:
for file_name in zf.namelist():
zip_files.append(file_name)
bounds_list = []
for raster_file in zip_files:
fp = open(zipfile_path, 'rb')
with ZipMemoryFile(fp) as zipmem:
with zipmem.open(raster_file) as src:
meta_info = src.meta
affine = src.transform
bounds_list.append(src.bounds)
env_bounds = {
'left': min([a.left for a in bounds_list]) - tol,
'right': max([a.right for a in bounds_list]) + tol,
'bottom': min([a.bottom for a in bounds_list]) - tol,
'top': max([a.top for a in bounds_list]) + tol
}
return env_bounds
def transform_raster_bounds(envelope_bounds,
raster_path_source = None,
meta = None):
"""
Utility function to change raster layer bounds and keeping the same
projection and resolution
This function uses the rasterio.wrap module to create a new raster data
layer with new bounds. Usually, rasterio.window module can be used when
trying to subset a dataset. But, window is not useful when we want to
increase the raster bounds. transform_raster_bounds fills that gap. The
resulting raster will be saved in memory using the rasterio.io module.
:param str raster_path_source: path to source raster layer.
:param dict envelope_bounds: dictionary to define new bounds. Following
rasterio standards, the dict must have left, right, bottom, and top as
keys.
:returns: Bytes object.
"""
if meta is None:
with rasterio.open(raster_path_source, 'r') as src:
print(src.bounds)
print(f'{src.height},{src.width}')
wind = rasterio.windows.from_bounds(left=envelope_bounds['left'],
bottom=envelope_bounds['bottom'],
right=envelope_bounds['right'],
top=envelope_bounds['top'],
transform=src.transform)
transform, width, height = rasterio.warp.calculate_default_transform(
src_crs=src.crs,
dst_crs=src.crs,
width=wind.width,
height=wind.height,
left=envelope_bounds['left'],
bottom=envelope_bounds['bottom'],
right=envelope_bounds['right'],
top=envelope_bounds['top'],
resolution=src.res
)
meta = src.meta.copy()
meta.update({
'transform': transform,
'width': width,
'height': height
})
with MemoryFile() as temp:
with temp.open(**meta) as dst:
rasterio.warp.reproject(source=rasterio.band(src, 1),
destination=rasterio.band(dst, 1),
src_transform=src.transform,
src_crs=src.crs,
dst_transform=transform,
dst_crs=src.crs,
resampling=rasterio.warp.Resampling.max)
temp_bytes = temp.read()
else:
with MemoryFile() as memfile:
with memfile.open(**meta) as dataset:
dataset.write(np.zeros(shape=(meta['height'], meta['width']),
dtype='int16'), 1)
wind = rasterio.windows.from_bounds(left=envelope_bounds['left'],
bottom=envelope_bounds['bottom'],
right=envelope_bounds['right'],
top=envelope_bounds['top'],
transform=dataset.transform)
transform, width, height = rasterio.warp.calculate_default_transform(
src_crs=dataset.crs,
dst_crs=dataset.crs,
width=wind.width,
height=wind.height,
left=envelope_bounds['left'],
bottom=envelope_bounds['bottom'],
right=envelope_bounds['right'],
top=envelope_bounds['top'],
resolution=dataset.res
)
meta = dataset.meta.copy()
meta.update({
'transform': transform,
'width': width,
'height': height
})
with MemoryFile() as temp:
with temp.open( **meta) as dst:
rasterio.warp.reproject(source=rasterio.band(dataset, 1),
destination=rasterio.band(dst, 1),
src_transform=dataset.transform,
src_crs=dataset.crs,
dst_transform=transform,
dst_crs=dataset.crs,
resampling=rasterio.warp.Resampling.max)
temp_bytes = temp.read()
return temp_bytes
def get_crosswalk(zipfile_path,
query,
engine,
geom_id,
meta_dict = None,
partial_coverage = False,
**args):
"""
Build a dictionary of polygons for each raster layer.
In order to extract only from overlapping geometries, we need to identify
which geometries have a complete overlap within the raster boundaries, and
optionally, also the geomtries that partially overlap.
:param str shapefile_path: path to the shapefile from where we want to
extract data.
:param list list_geoms: list of raster objects or list of paths to raster
geoms.
:param bool weight: Return partially overlapped geometries.
:param str **args: other arguments to pass to ``transform_raster_bounds``
:return: dictionary with file_id as key and overlap geometries with weights
if weight is True.
"""
bounds_geoms = draw_envelope(zipfile_path=zipfile_path,
tol=args['tolerance'])
temp_bytes = transform_raster_bounds(envelope_bounds=bounds_geoms,
meta=meta_dict)
src_shapes = gpd.read_postgis(query,
con=engine,
crs=4326)
bounds_shapes = box(minx=bounds_geoms['left'],
miny=bounds_geoms['bottom'],
maxx=bounds_geoms['right'],
maxy=bounds_geoms['top'])
overlap_shapes = []
for poly in src_shapes.iterfeatures():
shape_poly = asShape(poly['geometry'])
if bounds_shapes.contains(shape_poly):
overlap_shapes.append(poly)
if partial_coverage:
crosswalk_dict = {}
with MemoryFile(temp_bytes) as temp_raster:
with temp_raster.open() as src_raster:
for shape in overlap_shapes:
geom_rasterize = rasterio.features.rasterize([(shape['geometry'], 1)],
out_shape=src_raster.shape,
transform=src_raster.transform,
all_touched=True,
fill=-9999,
dtype='int16')
crosswalk_dict[shape['properties'][geom_id]] = {
'indices': np.where(geom_rasterize == 1),
'coverage': 100
}
else:
crosswalk_dict = {}
with MemoryFile(temp_bytes) as temp_raster:
with temp_raster.open() as src_raster:
for shape in overlap_shapes:
geom_rasterize = rasterio.features.rasterize([(shape['geometry'], 1)],
out_shape=src_raster.shape,
transform=src_raster.transform,
all_touched=False,
dtype='uint8')
crosswalk_dict[shape['properties'][geom_id]] = np.where(geom_rasterize == 1)
return crosswalk_dict
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.