Skip to content

Instantly share code, notes, and snippets.

@migurski
Last active December 14, 2015 18:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save migurski/5132554 to your computer and use it in GitHub Desktop.
Save migurski/5132554 to your computer and use it in GitHub Desktop.
Reduce floating point precision of WKB geometries for better zlib compression.
''' Shapeless handling of WKB geometries.
Use approximate_wkb() to copy an approximate well-known binary representation of
a geometry. Along the way, reduce precision of double floating point coordinates
by replacing their three least-significant bytes with nulls. The resulting WKB
will match the original at up to 26 bits of precision, close enough for
spherical mercator zoom 18 street scale geography.
Reduced-precision WKB geometries will compress as much as 50% smaller with zlib.
See also:
http://edndoc.esri.com/arcsde/9.0/general_topics/wkb_representation.htm
http://en.wikipedia.org/wiki/Double-precision_floating-point_format
'''
from struct import unpack
from StringIO import StringIO
#
# wkbByteOrder
#
wkbXDR = 0 # Big Endian
wkbNDR = 1 # Little Endian
#
# wkbGeometryType
#
wkbPoint = 1
wkbLineString = 2
wkbPolygon = 3
wkbMultiPoint = 4
wkbMultiLineString = 5
wkbMultiPolygon = 6
wkbGeometryCollection = 7
wkbMultis = wkbMultiPoint, wkbMultiLineString, wkbMultiPolygon, wkbGeometryCollection
def copy_byte(src, dest):
''' Copy an unsigned byte between files, and return it.
'''
byte = src.read(1)
dest.write(byte)
(val, ) = unpack('B', byte)
return val
def copy_int_little(src, dest):
''' Copy a little-endian unsigned 4-byte int between files, and return it.
'''
word = src.read(4)
dest.write(word)
(val, ) = unpack('<I', word)
return val
def copy_int_big(src, dest):
''' Copy a big-endian unsigned 4-byte int between files, and return it.
'''
word = src.read(4)
dest.write(word)
(val, ) = unpack('>I', word)
return val
def approx_point_little(src, dest):
''' Copy a pair of little-endian doubles between files, truncating significands.
'''
xy = src.read(2 * 8)
dest.write('\x00\x00\x00')
dest.write(xy[-13:-8])
dest.write('\x00\x00\x00')
dest.write(xy[-5:])
def approx_point_big(src, dest):
''' Copy a pair of big-endian doubles between files, truncating significands.
'''
xy = src.read(2 * 8)
dest.write(xy[:5])
dest.write('\x00\x00\x00')
dest.write(xy[8:13])
dest.write('\x00\x00\x00')
def approx_line(src, dest, copy_int, approx_point):
'''
'''
points = copy_int(src, dest)
for i in range(points):
approx_point(src, dest)
def approx_polygon(src, dest, copy_int, approx_point):
'''
'''
rings = copy_int(src, dest)
for i in range(rings):
approx_line(src, dest, copy_int, approx_point)
def approx_geometry(src, dest):
'''
'''
end = copy_byte(src, dest)
if end == wkbNDR:
copy_int = copy_int_little
approx_point = approx_point_little
elif end == wkbXDR:
copy_int = copy_int_big
approx_point = approx_point_big
else:
raise ValueError(end)
type = copy_int(src, dest)
if type == wkbPoint:
approx_point(src, dest)
elif type == wkbLineString:
approx_line(src, dest, copy_int, approx_point)
elif type == wkbPolygon:
approx_polygon(src, dest, copy_int, approx_point)
elif type in wkbMultis:
parts = copy_int(src, dest)
for i in range(parts):
approx_geometry(src, dest)
else:
raise ValueError(type)
def approximate_wkb(wkb_in):
''' Return an approximation of the input WKB with lower-precision geometry.
'''
input, output = StringIO(wkb_in), StringIO()
approx_geometry(input, output)
wkb_out = output.getvalue()
assert len(wkb_in) == input.tell(), 'The whole WKB was not processed'
assert len(wkb_in) == len(wkb_out), 'The output WKB is the wrong length'
return wkb_out
if __name__ == '__main__':
from random import random
from math import hypot
from shapely.wkb import loads
from shapely.geometry import *
point1 = Point(random(), random())
point2 = loads(approximate_wkb(point1.wkb))
assert hypot(point1.x - point2.x, point1.y - point2.y) < 1e-8
point1 = Point(random(), random())
point2 = Point(random(), random())
point3 = point1.union(point2)
point4 = loads(approximate_wkb(point3.wkb))
assert hypot(point3.geoms[0].x - point4.geoms[0].x, point3.geoms[0].y - point4.geoms[0].y) < 1e-8
assert hypot(point3.geoms[1].x - point4.geoms[1].x, point3.geoms[1].y - point4.geoms[1].y) < 1e-8
line1 = Point(random(), random()).buffer(1 + random(), 3).exterior
line2 = loads(approximate_wkb(line1.wkb))
assert abs(1. - line2.length / line1.length) < 1e-8
line1 = Point(random(), random()).buffer(1 + random(), 3).exterior
line2 = Point(random(), random()).buffer(1 + random(), 3).exterior
line3 = MultiLineString([line1, line2])
line4 = loads(approximate_wkb(line3.wkb))
assert abs(1. - line4.length / line3.length) < 1e-8
poly1 = Point(random(), random()).buffer(1 + random(), 3)
poly2 = loads(approximate_wkb(poly1.wkb))
assert abs(1. - poly2.area / poly1.area) < 1e-8
poly1 = Point(random(), random()).buffer(2 + random(), 3)
poly2 = Point(random(), random()).buffer(1 + random(), 3)
poly3 = poly1.difference(poly2)
poly4 = loads(approximate_wkb(poly3.wkb))
assert abs(1. - poly4.area / poly3.area) < 1e-8
poly1 = Point(random(), 2 + random()).buffer(1 + random(), 3)
poly2 = Point(2 + random(), random()).buffer(1 + random(), 3)
poly3 = poly1.union(poly2)
poly4 = loads(approximate_wkb(poly3.wkb))
assert abs(1. - poly4.area / poly3.area) < 1e-8
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment