Skip to content

Instantly share code, notes, and snippets.

@kylebarron
Created September 15, 2020 00:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kylebarron/58a52871c196549e9761d59d0451a8ae to your computer and use it in GitHub Desktop.
Save kylebarron/58a52871c196549e9761d59d0451a8ae to your computer and use it in GitHub Desktop.
Example of packing bits for sentinel data
import zlib
from io import BytesIO
# pip install bitstruct
import bitstruct
import numpy as np
import rasterio
path = 's3://sentinel-cogs/sentinel-s2-l2a-cogs/2020/S2A_26PQB_20200401_0_L2A/B02.tif'
r = rasterio.open(path)
# Takes a bit of time to read data, should be able to make it faster by choosing 256x256 window
arr = r.read()
# Only one band
arr = arr[0]
# Clamp to 12 bits
arr = arr.clip(0, 4095)
# Take a 256x256 shape for simplicity
arr = arr[:256, :256]
buf = BytesIO()
np.save(buf, arr)
buf.tell()
buf.seek(0)
compressed_buf = zlib.compress(buf.getvalue())
len(compressed_buf)
# 84272 compressed
# 131200 uncompressed
# 36% savings
flat = arr.flatten()
exp_byte_length = int(12 * 256 * 256 / 8)
data = bytearray(exp_byte_length)
# This is a horribly slow way to pack the data, but I didn't want to spend the
# time yet thinking through an efficient, vectorized approach.
offset = 0
for val in flat:
bitstruct.pack_into('u12', data, offset, val)
offset += 12
len(zlib.compress(data))
# 83046
# Only a 1.5% improvement over gzip directly
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment