Skip to content

Instantly share code, notes, and snippets.

@brendancol
Forked from perrygeo/zonal_stats.py
Created July 24, 2013 20:11
Show Gist options
  • Save brendancol/6074081 to your computer and use it in GitHub Desktop.
Save brendancol/6074081 to your computer and use it in GitHub Desktop.
"""
Zonal Statistics
Vector-Raster Analysis
Copyright 2013 Matthew Perry
Usage:
zonal_stats.py VECTOR RASTER
zonal_stats.py -h | --help
zonal_stats.py --version
Options:
-h --help Show this screen.
--version Show version.
"""
from osgeo import gdal, ogr
from osgeo.gdalconst import *
import numpy as np
import sys
gdal.PushErrorHandler('CPLQuietErrorHandler')
def bbox_to_pixel_offsets(gt, bbox):
originX = gt[0]
originY = gt[3]
pixel_width = gt[1]
pixel_height = gt[5]
x1 = int((bbox[0] - originX) / pixel_width)
x2 = int((bbox[1] - originX) / pixel_width) + 1
y1 = int((bbox[3] - originY) / pixel_height)
y2 = int((bbox[2] - originY) / pixel_height) + 1
xsize = x2 - x1
ysize = y2 - y1
return (x1, y1, xsize, ysize)
def zonal_stats(vector_path, raster_path, nodata_value=None, global_src_extent=False):
rds = gdal.Open(raster_path, GA_ReadOnly)
assert(rds)
rb = rds.GetRasterBand(1)
rgt = rds.GetGeoTransform()
if nodata_value:
nodata_value = float(nodata_value)
rb.SetNoDataValue(nodata_value)
vds = ogr.Open(vector_path, GA_ReadOnly) # TODO maybe open update if we want to write stats
assert(vds)
vlyr = vds.GetLayer(0)
# create an in-memory numpy array of the source raster data
# covering the whole extent of the vector layer
if global_src_extent:
# use global source extent
# useful only when disk IO or raster scanning inefficiencies are your limiting factor
# advantage: reads raster data in one pass
# disadvantage: large vector extents may have big memory requirements
src_offset = bbox_to_pixel_offsets(rgt, vlyr.GetExtent())
src_array = rb.ReadAsArray(*src_offset)
# calculate new geotransform of the layer subset
new_gt = (
(rgt[0] + (src_offset[0] * rgt[1])),
rgt[1],
0.0,
(rgt[3] + (src_offset[1] * rgt[5])),
0.0,
rgt[5]
)
mem_drv = ogr.GetDriverByName('Memory')
driver = gdal.GetDriverByName('MEM')
# Loop through vectors
stats = []
feat = vlyr.GetNextFeature()
while feat is not None:
if not global_src_extent:
# use local source extent
# fastest option when you have fast disks and well indexed raster (ie tiled Geotiff)
# advantage: each feature uses the smallest raster chunk
# disadvantage: lots of reads on the source raster
src_offset = bbox_to_pixel_offsets(rgt, feat.geometry().GetEnvelope())
src_array = rb.ReadAsArray(*src_offset)
# calculate new geotransform of the feature subset
new_gt = (
(rgt[0] + (src_offset[0] * rgt[1])),
rgt[1],
0.0,
(rgt[3] + (src_offset[1] * rgt[5])),
0.0,
rgt[5]
)
# Create a temporary vector layer in memory
mem_ds = mem_drv.CreateDataSource('out')
mem_layer = mem_ds.CreateLayer('poly', None, ogr.wkbPolygon)
mem_layer.CreateFeature(feat.Clone())
# Rasterize it
rvds = driver.Create('', src_offset[2], src_offset[3], 1, gdal.GDT_Byte)
rvds.SetGeoTransform(new_gt)
gdal.RasterizeLayer(rvds, [1], mem_layer, burn_values=[1])
rv_array = rvds.ReadAsArray()
# Mask the source data array with our current feature
# we take the logical_not to flip 0<->1 to get the correct mask effect
# we also mask out nodata values explictly
masked = np.ma.MaskedArray(
src_array,
mask=np.logical_or(
src_array == nodata_value,
np.logical_not(rv_array)
)
)
feature_stats = {
'min': float(masked.min()),
'mean': float(masked.mean()),
'max': float(masked.max()),
'std': float(masked.std()),
'sum': float(masked.sum()),
'count': int(masked.count()),
'fid': int(feat.GetFID())}
stats.append(feature_stats)
rvds = None
mem_ds = None
feat = vlyr.GetNextFeature()
vds = None
rds = None
return stats
if __name__ == "__main__":
opts = {'VECTOR': sys.argv[1], 'RASTER': sys.argv[2]}
stats = zonal_stats(opts['VECTOR'], opts['RASTER'])
try:
from pandas import DataFrame
print DataFrame(stats)
except ImportError:
import json
print json.dumps(stats, indent=2)
$ time python zonal_stats.py test.shp terrain/slope.tif
count fid max mean min std sum
0 203 0 96 65.876847 3 17.968489 13373
1 130 1 90 60.100000 3 16.728994 7813
2 1341 2 102 53.211037 2 17.901655 71356
3 130 3 90 60.100000 3 16.728994 7813
4 132 4 64 15.962121 1 15.360519 2107
5 132 5 53 31.515152 17 7.970100 4160
6 131 6 42 9.893130 0 8.168317 1296
7 132 7 64 28.712121 2 14.853594 3790
8 133 8 54 35.548872 11 8.878856 4728
9 131 9 82 52.297710 4 17.349877 6851
10 131 10 11 3.030534 0 1.781752 397
11 134 11 57 10.156716 1 11.960042 1361
12 133 12 45 19.000000 0 13.727750 2527
13 132 13 64 26.507576 1 18.848075 3499
14 132 14 94 52.787879 1 22.297585 6968
15 131 15 84 19.450382 1 15.992944 2548
16 132 16 52 11.583333 0 11.538501 1529
17 132 17 108 53.515152 6 18.198603 7064
18 341 18 76 39.117302 9 11.540482 13339
19 337 19 57 19.988131 4 9.593512 6736
20 336 20 78 48.636905 11 13.357014 16342
21 338 21 3 0.855030 0 0.527067 289
22 337 22 34 5.347181 0 7.069888 1802
23 341 23 0 0.000000 0 0.000000 0
24 341 24 42 16.612903 0 9.041271 5665
25 337 25 128 78.848665 5 18.689028 26572
26 341 26 29 7.973607 1 5.341357 2719
27 339 27 78 35.616519 5 14.455317 12074
28 341 28 65 20.199413 0 16.636394 6888
29 340 29 84 35.855882 1 17.022989 12191
30 338 30 96 61.440828 2 16.703587 20767
31 340 31 101 57.832353 8 18.161971 19663
real 0m1.311s
user 0m0.372s
sys 0m0.752s
#### Starspan equivalent
$ time starspan --vector test.shp --out-prefix testout --out-type table \
--summary-suffix _stats.csv --raster terrain/slope.tif \
--stats avg mode median min max sum stdev nulls && \
cat testout_stats.csv
1: Extracting from /usr/local/apps/land_owner_tools/lot/fixtures/downloads/terrain/slope.tif
Summary:
Intersecting features: 32
Polygons: 32
Processed pixels: 8379
real 0m1.440s
user 0m0.944s
sys 0m0.296s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment