Skip to content

Instantly share code, notes, and snippets.

@nikmolnar
Last active August 22, 2016 19:29
Show Gist options
  • Save nikmolnar/aeff678ff19cec94f162b4729cfad6bf to your computer and use it in GitHub Desktop.
Save nikmolnar/aeff678ff19cec94f162b4729cfad6bf to your computer and use it in GitHub Desktop.
import errno
import math
import os
import time
from csv import DictWriter
from netCDF4 import Dataset
import fiona
import numpy
from clover.geometry.bbox import BBox
from clover.netcdf.variable import SpatialCoordinateVariables
from pyproj import Proj
from rasterio.features import rasterize
from rasterio.warp import transform_geom
from shapely.geometry import shape
DEM = 'aligned_elevation.nc'
DATA_DIR = 'west1/1961_1990Y/'
ZONES_DIR = 'seed_zones/'
OUTPUT_DIR = 'output/'
CRS = {'init': 'EPSG:4326'}
PROJECTION = Proj(**CRS)
VARIABLES = (
'AHM', 'CMD', 'DD5', 'DD_0', 'EMT', 'Eref', 'EXT', 'FFP', 'MAP', 'MAT', 'MCMT', 'MSP', 'MWMT', 'PAS', 'SHM', 'TD'
)
SEED_ZONES = (
{
'name': 'oregon_douglas_fir',
'data': os.path.join(ZONES_DIR, 'Oregon_Seed_Zones', 'Douglas_Fir.shp'),
'zone_id_field': 'SZPSME'
},
{
'name': 'washington_douglas_fir',
'data': os.path.join(ZONES_DIR, 'WA_NEW_ZONES', 'PSME.shp'),
'zone_id_field': 'ZONE_NO'
},
{
'name': 'oregon_lodgepole_pine',
'data': os.path.join(ZONES_DIR, 'Oregon_Seed_Zones', 'Lodgepole_Pine.shp'),
'zone_id_field': 'szpico'
},
{
'name': 'washington_lodgepole_pine',
'data': os.path.join(ZONES_DIR, 'WA_NEW_ZONES', 'PICO.shp'),
'zone_id_field': 'ZONE_NO'
},
{
'name': 'oregon_ponderosa_pine',
'data': os.path.join(ZONES_DIR, 'Oregon_Seed_Zones', 'Ponderosa_Pine.shp'), # File is missing zones
'zone_id_field': 'SZPIPO'
},
{
'name': 'washington_ponderosa_pine',
'data': os.path.join(ZONES_DIR, 'WA_NEW_ZONES', 'PIPO.shp'),
'zone_id_field': 'ZONE_NO'
},
{
'name': 'oregon_western_redcedar',
'data': os.path.join(ZONES_DIR, 'Oregon_Seed_Zones', 'Western_Red_Cedar.shp'),
'zone_id_field': 'SZTHPL'
},
{
'name': 'washington_western_redcedar',
'data': os.path.join(ZONES_DIR, 'WA_NEW_ZONES', 'THPL.shp'),
'zone_id_field': 'ZONE_NO'
},
{
'name': 'oregon_western_white_pine',
'data': os.path.join(ZONES_DIR, 'Oregon_Seed_Zones', 'Western_White_Pine.shp'), # File is missing zone 1
'zone_id_field': 'SZPIMO'
},
{
'name': 'washington_western_white_pine',
'data': os.path.join(ZONES_DIR, 'WA_NEW_ZONES', 'PIMO.shp'),
'zone_id_field': 'ZONE_NO'
},
{
'name': 'oregon_washington_historic',
'data': os.path.join(ZONES_DIR, 'historic_seed_zones', 'historic_seed_zones.shp'),
'zone_id_field': 'SUBJ_FSZ'
}
)
def get_bands_fn(name):
""" Return a function to generate elevation bands for a zone """
def _every(low, high, increment, start=0):
""" Returns a generator for zones within low-high based on the increment """
if high < low:
return (x for x in [])
low -= start
high -= start
low -= low % increment
high += increment - high % increment
return ((x + start, x + increment + start) for x in range(low, high, increment))
def historical(zone_id, low, high):
return _every(low, high, 500)
def wa_psme(zone_id, low, high):
if zone_id < 13:
return _every(low, high, 1000)
elif zone_id < 17:
return [(0, 1400)] + list(_every(1400, high, 700, 1400))
else:
raise ValueError('Unrecognized zone: {}'.format(zone_id))
def or_psme(zone_id, low, high):
if zone_id < 4:
return [(0, 2000), (2000, 2750)] + list(_every(2750, high, 500, 2750))
elif zone_id == 5:
return _every(low, high, 500)
elif zone_id == 4 or zone_id < 17:
return [(0, 1000)] + list(_every(1000, high, 500, 1000))
else:
raise ValueError('Unrecognized zone: {}'.format(zone_id))
def wa_pico(zone_id, low, high):
if zone_id < 14:
return _every(low, high, 1000)
elif zone_id < 18:
return _every(low, high, 700)
else:
raise ValueError('Unrecognized zone: {}'.format(zone_id))
def or_pico(zone_id, low, high):
return _every(low, high, 1000)
def or_pipo(zone_id, low, high):
if zone_id == 10:
return list(_every(low, min(4999, high), 1000)) + list(_every(5000, high, 700, 5000))
elif zone_id < 16:
return _every(low, high, 1000)
else:
raise ValueError('Unrecognized zone: {}'.format(zone_id))
def wa_pipo(zone_id, low, high):
if zone_id < 4:
return [(0, high + 1)]
elif zone_id < 12:
return _every(low, high, 1000)
else:
raise ValueError('Unrecognized zone: {}'.format(zone_id))
def wa_thpl(zone_id, low, high):
if zone_id < 5:
return _every(low, high, 2000)
elif zone_id < 8:
return _every(low, high, 1500)
else:
raise ValueError('Unrecognized zone: {}'.format(zone_id))
def or_thpl(zone_id, low, high):
if zone_id < 3:
return [(0, 999999)]
elif zone_id < 5:
return [(0, 3500), (3500, high + 1)]
else:
raise ValueError('Unrecognized zone: {}'.format(zone_id))
def wa_pimo(zone_id, low, high):
return [(low, high + 1)]
def or_pimo(zone_id, low, high):
return [(low, high + 1)]
return {
'oregon_washington_historic': historical,
'oregon_douglas_fir': or_psme,
'washington_douglas_fir': wa_psme,
'oregon_lodgepole_pine': or_pico,
'washington_lodgepole_pine': wa_pico,
'oregon_ponderosa_pine': or_pipo,
'washington_ponderosa_pine': wa_pipo,
'oregon_western_redcedar': or_thpl,
'washington_western_redcedar': wa_thpl,
'oregon_western_white_pine': or_pimo,
'washington_western_white_pine': wa_pimo
}[name]
def get_subsets(elevation, data, coords, bounds):
""" Returns subsets of elevation, data, and coords, clipped to the given bounds """
bbox = BBox(bounds)
x_slice = slice(*coords.x.indices_for_range(bbox.xmin, bbox.xmax))
y_slice = slice(*coords.y.indices_for_range(bbox.ymin, bbox.ymax))
return elevation[y_slice, x_slice], data[y_slice, x_slice], coords.slice_by_bbox(BBox(bounds))
def write_row(writer, variable, zone_file, zone_id, masked_data, low, high):
minValue = numpy.nanmin(masked_data)
maxValue = numpy.nanmax(masked_data)
transfer = (maxValue - minValue) / 2.0
center = maxValue - transfer
nan_data = masked_data.astype(numpy.float32)
nan_data[masked_data.mask] = numpy.nan
p5 = numpy.nanpercentile(nan_data, 5)
p95 = numpy.nanpercentile(nan_data, 95)
p_transfer = (p95 - p5) / 2.0
p_center = p95 - p_transfer
writer.writerow({
'samples': os.path.join(
'{}_samples'.format(variable), '{}_zone_{}_{}_{}.txt'.format(zone_file, zone_id, low, high)
),
'zone_file': zone_file,
'zone': zone_id,
'band_low': low,
'band_high': high,
'median': float(numpy.ma.median(masked_data)),
'mean': numpy.nanmean(masked_data),
'min': minValue,
'max': maxValue,
'transfer': transfer,
'center': center,
'p5': p5,
'p95': p95,
'p_transfer': p_transfer,
'p_center': p_center
})
def write_sample(variable, zone_file, zone_id, masked_data, low, high):
sample = masked_data.compressed() # Discard masked values
numpy.random.shuffle(sample)
sample = sample[:1000]
filename = '{}_zone_{}_{}_{}.txt'.format(zone_file, zone_id, low, high)
with open(os.path.join(OUTPUT_DIR, '{}_samples'.format(variable), filename), 'w') as f:
f.write(','.join(str(x) for x in sample))
f.write(os.linesep)
def main():
seed = input('Enter random seed (leave blank to auto-generate): ')
if not seed:
seed = int(time.time())
print('Using random seed: {}'.format(int(seed)))
numpy.random.seed(seed)
with Dataset(DEM) as ds:
coords = SpatialCoordinateVariables.from_dataset(ds, x_name='lon', y_name='lat', projection=PROJECTION)
elevation = ds.variables['Band1'][:]
for variable in VARIABLES:
print('Processing {}...'.format(variable))
try:
os.mkdir(os.path.join(OUTPUT_DIR, '{}_samples'.format(variable)))
except OSError as ex:
if ex.errno != errno.EEXIST:
raise
with open(os.path.join(OUTPUT_DIR, '{}.csv'.format(variable)), 'w') as f_out:
writer = DictWriter(
f_out, fieldnames=[
'samples', 'zone_file', 'zone', 'band_low', 'band_high', 'median', 'mean', 'min', 'max', 'transfer',
'center', 'p5', 'p95', 'p_transfer', 'p_center'
])
writer.writeheader()
with Dataset(os.path.join(DATA_DIR, '{}.nc'.format(variable))) as ds:
data = ds.variables[variable][:]
for zone_shp in SEED_ZONES:
with fiona.open(zone_shp['data'], 'r') as shp:
for feature in shp:
zone_id = int(feature['properties'][zone_shp['zone_id_field']] or 0)
if zone_id == 0:
continue
geometry = transform_geom(shp.crs, CRS, feature['geometry'])
clipped_elevation, clipped_data, clipped_coords = get_subsets(
elevation, data, coords, shape(geometry).bounds
)
zone_mask = rasterize(
((geometry, 1),), out_shape=clipped_elevation.shape, transform=clipped_coords.affine,
fill=0, dtype=numpy.dtype('uint8')
)
masked_dem = numpy.ma.masked_where(zone_mask == 0, clipped_elevation)
min_elevation = math.floor(numpy.nanmin(masked_dem) / 0.3048)
max_elevation = math.ceil(numpy.nanmax(masked_dem) / 0.3048)
zone_name = zone_shp['name']
bands = list(get_bands_fn(zone_name)(zone_id, min_elevation, max_elevation))
if bands is None:
print('WARNING: No elevation bands found for {}, zone {}'.format(
zone_name, zone_id
))
continue
for band in bands:
low, high = band
# Elevation bands are represented in feet
masked_data = numpy.ma.masked_where(
(zone_mask == 0) | (clipped_elevation < low * 0.3048) |
(clipped_elevation >= high * 0.3048),
clipped_data
)
write_row(writer, variable, zone_name, zone_id, masked_data, low, high)
write_sample(variable, zone_name, zone_id, masked_data, low, high)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment