Skip to content

Instantly share code, notes, and snippets.

@grovduck
Last active November 15, 2017 21:32
Show Gist options
  • Save grovduck/88e8d28411115e8bf8a3f096b312c45a to your computer and use it in GitHub Desktop.
Save grovduck/88e8d28411115e8bf8a3f096b312c45a to your computer and use it in GitHub Desktop.
[Rasterio chunks based on RAM] Script to return window address based on user-defined allowable RAM usage. This allows the user to do fewer reads on rasterio data. #rasterio #numpy
"""
This script returns window addresses based on user-defined maximum RAM
usage. It is similar to rasterio.block_windows but allows the user to
potentially do fewer reads on rasterio data.
"""
import rasterio
import numpy as np
def chunk_windows(r, indexes=None, max_ram=250000000):
"""
Determine windows for reading a rasterio dataset based on a
user-specified RAM limit
Parameters
----------
r : rasterio.DatasetReader
The raster used to determine chunk size
indexes : int or sequence
The subset of indexes (bands) to use to determine chunk size.
Defaults to None, which uses all bands in the raster
max_ram : int
The maximum RAM to allocate per chunk. Defaults to ~250MB.
Returns
-------
A generator with the chunk index and chunk window address as a tuple.
This is the same structure that is returned by rasterio.block_windows
"""
# Get the indexes (bands) to use - these are 1-indexed (instead of 0)
if indexes is None:
indexes = r.indexes
elif isinstance(indexes, int):
indexes = [indexes]
if not indexes:
raise ValueError('No indexes to read')
# Calculate the size of a pixel across all bands
pixel_size = 0
for bidx in indexes:
if bidx not in r.indexes:
raise IndexError('band index out of range')
idx = r.indexes.index(bidx)
pixel_size += np.dtype(r.dtypes[idx]).itemsize
# Calculate how many pixels to collect per chunk
chunk_size, _ = divmod(max_ram, pixel_size)
# Short circuit the case where the entire raster fits in one chunk
r_h, r_w = r.height, r.width
if chunk_size >= r_h * r_w:
yield (0, 0), ((0, r_h), (0, r_w))
# Otherwise calculate how many "block rows" (block height * raster width)
# will fit in one chunk (chunk_height)
else:
b_h, b_w = r.block_shapes[0]
d, _ = divmod(chunk_size, r_w * b_h)
chunk_height = d * b_h
# Calculate the number of chunks and return the chunk windows
d, m = divmod(r_h, chunk_height)
n_chunks = d + int(m>0)
for i in range(n_chunks):
row = i * chunk_height
height = min(chunk_height, r_h - row)
yield (i, 0), ((row, row+chunk_height), (0, r_w))
if __name__ == '__main__':
import sys
# Raster to read
fn = sys.argv[1]
# Maximum RAM to use per chunk
max_ram = int(sys.argv[2])
# Get chunks (full rows) and print the window and maximum value
with rasterio.open(fn) as src:
for _, window in chunk_windows(src, max_ram=max_ram):
print 'Window =', window,
arr = src.read(1, window=window)
print 'Max value =', np.max(arr)
del arr
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment