Skip to content

Instantly share code, notes, and snippets.

@sixy6e
Last active December 6, 2015 11:18
Show Gist options
  • Save sixy6e/a4a1a14ffd43012b0242 to your computer and use it in GitHub Desktop.
Save sixy6e/a4a1a14ffd43012b0242 to your computer and use it in GitHub Desktop.
KEA-format
#!/usr/bin/env python
from affine import Affine
import h5py
from dtypes import NUMPY2KEADTYPE
from dtypes import KEA2NUMPYDTYPE
from dtypes import GDAL2KEADTYPE
from dtypes import KEA2GDALDTYPE
from rasterio.crs import from_string
from rasterio.crs import to_string
import osr
import collections
import numpy
IMAGE_VERSION = "1.2"
VERSION = "1.1"
FILETYPE = "KEA"
GENERATOR = "h5py"
def open(path, mode='r', width=None, height=None, count=None, transform=None,
crs=None, no_data=None, dtype=None, chunks=(256, 256),
blocksize=256, compression=1, band_names=None):
# should we pass to different read write classes based on the mode?
if mode == 'r':
fid = h5py.File(path, mode)
ds = KeaH5RDOnly(fid)
elif mode == 'r+':
fid = h5py.File(path, mode)
ds = KeaH5RW(fid)
elif mode =='w':
# Check we have all the necessary creation options
if (width is None) or (height is None):
msg = "Error. Both width and height must be specified."
raise ValueError(msg)
if dtype is None:
msg = "Error. The dtype must be specifified."
raise ValueError(msg)
if count is None:
msg = "Error. The count must be specified."
raise ValueError(msg)
# If we have no transform, default to image co-ordinates
if (transform is None) or (crs is None):
ul = (0, 0)
rot = (0, 0)
res = (1, -1)
transform = Affine.from_gdal(*[0.0, 1.0, 0.0, 0.0, 0.0, -1.0])
crs = ""
if (chunks[0] > height) or (chunks[1] > width):
msg = "The chunks must not exceed the width or height."
raise ValueError(msg)
# we'll use rasterio's proj4 dict mapping
if not isinstance(crs, dict):
msg = "Error. The crs is not a valid proj4 dict style mapping."
raise ValueError(msg)
# we'll follow rasterio in using an affine
if not isinstance(transform, Affine):
msg = "Error. The transform is not an Affine instance."
transform = Affine.from_gdal(*transform)
fid = h5py.File(path, mode)
create_kea_image(fid, width, height, count, transform, crs, no_data,
dtype, chunks, blocksize, compression, band_names)
ds = KeaH5RW(fid)
return ds
def create_kea_image(fid, width, height, count, transform, crs, no_data,
dtype, chunksize, blocksize, compression, band_names):
"""
Initialises the KEA format layout
"""
# group names for each band
band_group_names = ['BAND{}'.format(i+1) for i in range(count)]
# resolutio, ul corner tie point co-ordinate, rotation
res = (transform[0], transform[4])
ul = (transform[2], transform[5])
rot = (transform[1], transform[3])
# gdal or numpy number dtype value
kea_dtype = NUMPY2KEADTYPE[dtype]
# convert the proj4 dict to wkt
sr = osr.SpatialReference()
sr.ImportFromProj4(to_string(crs))
crs_wkt = sr.ExportToWkt()
# create band level groups
band_groups = {}
for gname in band_group_names:
fid.create_group(gname)
fid[gname].create_group('METADATA')
# TODO need example data in order to flesh the overviews section
fid[gname].create_group('OVERVIEWS')
# dataset for our data and associated attributes
fid[gname].create_dataset('DATA', shape=shape, dtype=dtype,
compression=compression, chunks=chunks,
fillvalue=no_data)
# CLASS 'IMAGE', is a HDF recognised attribute
fid[gname]['DATA'].attrs['CLASS'] = 'IMAGE'
fid[gname]['DATA'].attrs['IMAGE_VERSION'] = IMAGE_VERSION
fid[gname]['DATA'].attrs['BLOCK_SIZE'] = blocksize
# KEA has defined their own numerical datatype mapping
fid[gname].create_dataset('DATATYPE', shape=(1,),
data=kea_dtype, dtype='uint16')
# descriptors of the dataset
# TODO what info can be populated here???
fid[gname].create_dataset('DESCRIPTION', shape=(1,),
data='')
# we'll use a default, but all the user to overide later
fid[gname].create_dataset('LAYER_TYPE', shape=(1,), data=0)
fid[gname].create_dataset('LAYER_USAGE', shape=(1,), data=0)
# TODO unclear on this section
fid[gname].create_group('ATT/DATA')
# TODO need an example in order to flesh the neighbours section
fid[gname].create_group('ATT/NEIGHBOURS')
# TODO unclear on header chunksize and size
fid[gname].create_dataset('ATT/HEADER/CHUNKSIZE', data=0,
dtype='uint64')
fid[gname].create_dataset('ATT/HEADER/SIZE', data=[0,0,0,0,0],
dtype='uint64')
# do we have no a data value
if no_data is not None:
fid[gname].create_dataset('NO_DATA_VAL', shape=(1,),
data=no_data)
# Some groups like GCPS will be empty depending on the type of image
# being written to disk. As will some datasets.
# TODO need an example in order to flesh out the GCP's section
fid.create_group('GCPS')
# TODO what else can we put in the metadata section
met = fid.create_group('METADATA')
# if we don't have an band names defined, create default values
if band_names is None:
band_names = ['Band {}'.format(bn + 1) for bn in range(count)]
elif len(band_names) != count:
# overwrite the user (probably should notify the user)
bname_format = 'Band {}'
band_names = ['Band {}'.format(bn + 1) for bn in range(count)]
# write the band names to the METADATA group, as individually
# named datasets of the form 'Band_n'; n=1..nbands
dname_fmt = 'Band_{}'
for i, bname in enumerate(band_names):
dname = dname_fmt.format(i + 1)
met.create_dataset(dname, shape=(1,), data=bname)
# necessary image collection info
hdr = fid.create_group('HEADER')
# header datasets
hdr.create_dataset('WKT', shape=(1,), data=crs_wkt)
hdr.create_dataset('SIZE', data=(width, height), dtype='uint64')
hdr.create_dataset('VERSION', shape=(1,), data=VERSION)
hdr.create_dataset('RES', data=res, dtype='float64')
hdr.create_dataset('TL', data=ul, dtype='float64')
hdr.create_dataset('ROT', data=rot, dtype='float64')
hdr.create_dataset('NUMBANDS', shape=(1,), data=count, dtype='uint16')
hdr.create_dataset('FILETYPE', shape=(1,), data=FILETYPE)
hdr.create_dataset('GENERATOR', shape=(1,), data=GENERATOR)
# flush any cached items
fid.flush()
# TODO
# maybe we could use a base class that retrieves and sets
# the various properties such as header, width, height, crs, transfrom etc
class KeaH5RDOnly(object):
def __init__(self, fid):
self._fid = fid
self.header = self._read_header()
self.width = self.header['SIZE'][0]
self.height = self.header['SIZE'][1]
self.count = self.header['NUMBANDS']
self.band_groups = self._band_groups()
self.band_datasets = self._band_datasets()
def _read_header(self):
_hdr = self._fid['HEADER']
hdr = {}
for key in _hdr:
hdr[key] = _hdr[key][:]
return hdr
def _band_groups(self):
gname_fmt = 'BAND{}'
band_groups = {}
for band in range(1, self.count + 1):
grp = gname_fmt.format(band)
band_groups[band] = self._fid[grp]
return band_groups
def _band_datasets(self):
bname_fmt = 'BAND{}/DATA'
bnd_dsets = {}
for band in range(1, self.count + 1):
dset = bname_fmt.format(band)
bnd_dsets[band] = self._fid[dset]
return bnd_dsets
@property
def crs(self):
sr = osr.SpatialReference()
sr.ImportFromWkt(self.header['WKT'][0])
crs = from_string(sr.ExportToProj4())
return crs
@property
def transform(self):
transform = [self.header['TL'][0],
self.header['RES'][0],
self.header['ROT'][0],
self.header['TL'][1],
self.header['ROT'][1],
self.header['RES'][1]]
return Affine.from_gdal(*transform)
@property
def no_data(self):
# Do we return a no data value for each band dataset???
# dict mapping or plain list. dict would allow different nan's
# for different bands
no_data = []
for bgrp in self.band_groups:
no_data.append(self.band_groups[bgrp]['NO_DATA_VAL'][0])
return no_data
@property
def dtypes(self):
dtypes = []
for bgrp in self.band_groups:
dtype = self.band_groups[bgrp]['DATATYPE'][0]
dtypes.append(KEA2NUMPYDTYPE[dtype])
return dtypes
@property
def chunks(self):
chunks = self.band_datasets[1].chunks
return chunks
@property
def metadata(self):
metadata = {}
for key in self._fid['METADATA']:
metadata[key] = self._fid[key][:]
@property
def description(self):
description = []
for bgrp in self.band_groups:
description.append(self.band_groups[bgrp]['DESCRIPTION'][0])
return description
# TODO have a dict mapping to a string equivalent
@property
def layer_useage(self):
layer_useage = []
for bgrp in self.band_groups:
layer_useage.append(self.band_groups[bgrp]['LAYER_USAGE'][0])
return layer_useage
# TODO have a dict mapping to a string equivalent
@property
def layer_type(self):
layer_type = []
for bgrp in self.band_groups:
layer_type.append(self.band_groups[bgrp]['LAYER_TYPE'][0])
return layer_type
# TODO retrieve the metadata and band names
def read(self, bands, window=None):
if isinstance(bands, collections.Sequence):
nb = len(bands)
if window is None:
data = numpy.zeros((nb, self.height, self.width),
dtype=self.dtypes[0])
for i, band in enumerate(bands):
self.band_datasets[band].read_direct(data[i])
else:
ys, ye = window[0]
xs, xe = window[1]
ysize = ye - ys
xsize = xe - xs
idx = numpy.s_[ys:ye, xs:xe]
data = numpy.zeros((nb, ysize, xsize),
dtype=self.dtypes[0])
for i, band in enumerate(bands):
self.band_datasets[band].read_direct(data[i], idx)
else:
if window is None:
data = self.band_datasets[bands][:]
else:
ys, ye = window[0]
xs, xe = window[1]
idx = numpy.s_[ys:ye, xs:xe]
data = self.band_datasets[bands][idx]
return data
# This should probably be a subclass of ReadOnly, in order to avoid
# duplicating the read function
#class KeaH5RW(object):
#
# def __init__(fid):
# def read(bands, window):
# def write(bands, window):

KEA Image Format

A h5py interface to the KEA image format

#!/usr/bin/env python
import gdal
# kea undefined is 0; not sure what the mapping to numpy is
NUMPY2KEADTYPE = {'int8': 1,
'int16': 2,
'int32': 3,
'int64': 4,
'uint8': 5,
'uin16': 6,
'uint32': 7,
'uint64': 8,
'float32': 9,
'float64': 10}
KEA2NUMPYDTYPE = {v: k for k, v in NUMPY2KEADTYPE.items()}
GDAL2KEADTYPE = {gdal.GDT_Unknown: 0,
gdal.GDT_Int16: 2,
gdal.GDT_Int32: 3,
gdal.GDT_Byte: 5,
gdal.GDT_UInt16: 6,
gdal.GDT_UInt32: 7,
gdal.GDT_Float32: 9,
gdal.GDT_Float64: 10}
KEA2GDALDTYPE = {v: k for k, v in GDAL2KEADTYPE.items()}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment