Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save dimitri-justeau/20f256086cb153e32213a2c03c1d1327 to your computer and use it in GitHub Desktop.
Save dimitri-justeau/20f256086cb153e32213a2c03c1d1327 to your computer and use it in GitHub Desktop.
Converting multi-class vector dataset to multiple single-class presence-absence raster datasets, with GeoPandas and Rasterio
# coding: utf-8
import os
import math
import geopandas as gpd
import rasterio
import rasterio.features
from affine import Affine
def multiclass_vector_to_singleclass_rasters(input_vector, class_column,
bounding_box, output_base_path,
pixel_size=None, n_cols=None,
n_rows=None, file_prefix='',
file_suffix='',
all_touched=False):
"""
Converts a single shapefile containing vector features of several classes
to multiple single-class presence-absence rasters, one for each class.
The name of the output rasters correspond to the class value
(customizable with a prefix and/or a suffix). The output rasters CRS is
the same as the input vector CRS.
:param input_vector: The input vector file
(must be supported by GeoPandas).
:param class_column: The name of the class columns.
:param bounding_box: A bounding tuple in the form
(x_min, y_min, x_max, y_max).
:param output_base_path: The base path of the output files.
:param pixel_size: The pixel size, a couple (width, height)
or a single value for square pixels. If None, the
pixel size will be computed from n_cols and/or n_rows
parameters.
:param n_cols: The desired number of columns, used only if the pixel
size is None. Can be used with or without a n_rows value.
:param n_rows: The desired number of rows, used only if the pixel
size is None. Can be used with or without a n_cols value.
:param file_prefix: The output files name prefix.
:param file_suffix: The output files name suffix.
:param all_touched: See all_touched argument of rasterio.features.rasterize
https://rasterio.readthedocs.io/en/latest/api/rasterio.features.html#rasterio.features.rasterize
"""
# Load the features
features = gpd.read_file(input_vector)
# Compute resolution
x_min, y_min, x_max, y_max = bounding_box
# - Case 1: pixel size is directly given
if pixel_size is not None:
if isinstance(pixel_size, (list, tuple)) and len(pixel_size) >= 2:
pixel_width = pixel_size[0]
pixel_height = pixel_size[1]
elif isinstance(pixel_size, (int, float)):
pixel_width = pixel_size
pixel_height = pixel_size
else:
raise TypeError()
n_rows = math.ceil((y_max - y_min) / pixel_height)
n_cols = math.ceil((x_max - x_min) / pixel_width)
# - Case 2: pixel size determined from the number of columns and/or rows
elif n_cols is not None:
if n_rows is not None:
# - Case 2.1: from number of columns and rows
pixel_width = (x_max - x_min) / n_cols
pixel_height = (y_max - y_min) / n_rows
else:
# - Case 2.2: from number of columns only (compute n_rows)
pixel_width = (x_max - x_min) / n_cols
pixel_height = pixel_width
n_rows = math.ceil((y_max - y_min) / pixel_width)
elif n_rows is not None:
# - Case 2.3: from number of rows only (compute n_cols)
pixel_height = (y_max - y_min) / n_rows
pixel_width = pixel_height
n_cols = math.ceil((x_max - x_min) / pixel_height)
else:
raise TypeError()
# Define the affine transformation
affine = Affine(pixel_width, 0, x_min, 0, -pixel_height, y_max)
# Generate the rasters
out_meta = {
'width': n_cols,
'height': n_rows,
'transform': affine,
'crs': features.crs,
'driver': 'GTiff',
'count': 1,
'dtype': rasterio.int16
}
classes = features[class_column].unique()
for class_value in classes:
feats = features[features[class_column] == class_value]
shapes = [(geom, 1) for geom in feats.geometry]
file_name = "{}{}{}.tif".format(file_prefix, class_value, file_suffix)
if not os.path.exists(output_base_path):
os.makedirs(output_base_path)
output_path = os.path.join(output_base_path, file_name)
rasterized = rasterio.features.rasterize(
shapes=shapes,
out_shape=(out_meta['height'], out_meta['width']),
dtype=out_meta['dtype'],
transform=out_meta['transform'],
all_touched=all_touched
)
with rasterio.open(output_path, 'w', **out_meta) as out:
out.write_band(1, rasterized)
if __name__ == '__main__':
import argparse
description = \
"""
Converts a single shapefile containing vector features of several
classes to multiple single-class presence-absence rasters, one for
each class. The name of the output rasters correspond to the class
value. The output rasters CRS is the same as the input vector CRS.
"""
parser = argparse.ArgumentParser(description=description)
parser.add_argument(
'input_vector',
type=str,
help="The input vector file (must be supported by GeoPandas)."
)
parser.add_argument(
'class_column',
type=str,
help="The name of the class columns."
)
parser.add_argument(
'x_min',
type=float,
help="x_min value of the bounding box."
)
parser.add_argument(
'y_min',
type=float,
help="y_min value of the bounding box."
)
parser.add_argument(
'x_max',
type=float,
help="x_max value of the bounding box."
)
parser.add_argument(
'y_max',
type=float,
help="x_max value of the bounding box."
)
parser.add_argument(
'output_base_path',
default=',',
help="The base path of the output files."
)
parser.add_argument(
'--pixel_size',
type=float,
default=None,
help="""
The pixel size, a single value for square pixels. If None, the
pixel size will be computed from --pixel_width and --pixel_height,
or n_cols and/or n_rows parameters.
"""
)
parser.add_argument(
'--pixel_width',
type=float,
default=None,
help="""
The pixel width. If If not given nor --pixel_size, pixel size will
be computed from n_cols and/or n_rows parameters.
"""
)
parser.add_argument(
'--pixel_height',
type=float,
default=None,
help="""
The pixel height. If not given nor --pixel_size, pixel size will
be computed from n_cols and/or n_rows parameters.
"""
)
parser.add_argument(
'--n_cols',
type=int,
default=None,
help="""
The desired number of columns, used only if the pixel
size or width/height is not given. Can be used with or without
a --n_rows value.
"""
)
parser.add_argument(
'--n_rows',
type=int,
default=None,
help="""
The desired number of rows, used only if the pixel
size or width/height is not given. Can be used with or without
a n_cols value.
"""
)
parser.add_argument(
'--file_prefix',
type=str,
default='',
help="The output files name prefix."
)
parser.add_argument(
'--file_suffix',
type=str,
default='',
help="The output files name suffix."
)
parser.add_argument(
'--all_touched',
action="store_true",
help="""
See all_touched argument of rasterio.features.rasterize
https://rasterio.readthedocs.io/en/latest/api/rasterio.features.html#rasterio.features.rasterize
"""
)
args = parser.parse_args()
bounding_box = (args.x_min, args.y_min, args.x_max, args.y_max)
pixel_size = None
if args.pixel_size is not None:
pixel_size = args.pixel_size
elif args.pixel_width is not None and args.pixel_height is not None:
pixel_size = (args.pixel_width, args.pixel_height)
multiclass_vector_to_singleclass_rasters(
args.input_vector,
args.class_column,
bounding_box,
args.output_base_path,
pixel_size=pixel_size,
n_cols=args.n_cols,
n_rows=args.n_rows,
file_prefix=args.file_prefix,
file_suffix=args.file_suffix,
all_touched=args.all_touched
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment