Created
September 15, 2020 00:45
-
-
Save kylebarron/58a52871c196549e9761d59d0451a8ae to your computer and use it in GitHub Desktop.
Example of packing bits for sentinel data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import zlib | |
from io import BytesIO | |
# pip install bitstruct | |
import bitstruct | |
import numpy as np | |
import rasterio | |
path = 's3://sentinel-cogs/sentinel-s2-l2a-cogs/2020/S2A_26PQB_20200401_0_L2A/B02.tif' | |
r = rasterio.open(path) | |
# Takes a bit of time to read data, should be able to make it faster by choosing 256x256 window | |
arr = r.read() | |
# Only one band | |
arr = arr[0] | |
# Clamp to 12 bits | |
arr = arr.clip(0, 4095) | |
# Take a 256x256 shape for simplicity | |
arr = arr[:256, :256] | |
buf = BytesIO() | |
np.save(buf, arr) | |
buf.tell() | |
buf.seek(0) | |
compressed_buf = zlib.compress(buf.getvalue()) | |
len(compressed_buf) | |
# 84272 compressed | |
# 131200 uncompressed | |
# 36% savings | |
flat = arr.flatten() | |
exp_byte_length = int(12 * 256 * 256 / 8) | |
data = bytearray(exp_byte_length) | |
# This is a horribly slow way to pack the data, but I didn't want to spend the | |
# time yet thinking through an efficient, vectorized approach. | |
offset = 0 | |
for val in flat: | |
bitstruct.pack_into('u12', data, offset, val) | |
offset += 12 | |
len(zlib.compress(data)) | |
# 83046 | |
# Only a 1.5% improvement over gzip directly |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment