Skip to content

Instantly share code, notes, and snippets.

@minrk
Created August 27, 2013 18:27
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save minrk/6357188 to your computer and use it in GitHub Desktop.
Save minrk/6357188 to your computer and use it in GitHub Desktop.
"""Python implementation of Z85 85-bit encoding
Z85 encoding is a plaintext encoding for a bytestring interpreted as 32bit integers.
Since the chunks are 32bit, a bytestring must be a multiple of 4 bytes.
See ZMQ RFC 32 for details.
"""
#-----------------------------------------------------------------------------
# Copyright (c) 2013 Brian Granger, Min Ragan-Kelley
#
# This file is part of pyzmq
#
# Distributed under the terms of the New BSD License. The full license is in
# the file COPYING.BSD, distributed as part of this software.
#-----------------------------------------------------------------------------
import struct
# Z85CHARS is the base 85 symbol table
Z85CHARS = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#"
# Z85MAP maps integers in [0,84] to the appropriate character in Z85CHARS
Z85MAP = dict([(c, idx) for idx, c in enumerate(Z85CHARS)])
_85s = [ 85**i for i in range(5) ][::-1]
def encode(rawbytes):
"""encode raw bytes into Z85"""
# Accepts only byte arrays bounded to 4 bytes
if len(rawbytes) % 4:
raise ValueError("length must be multiple of 4, not %i" % len(rawbytes))
nvalues = len(rawbytes) / 4
values = struct.unpack('>%dI' % nvalues, rawbytes)
encoded = []
for v in values:
for offset in _85s:
encoded.append(Z85CHARS[(v // offset) % 85])
return b''.join(encoded)
def decode(z85bytes):
"""decode Z85 bytes to raw bytes"""
if len(z85bytes) % 5:
raise ValueError("Z85 length must be multiple of 5, not %i" % len(z85bytes))
nvalues = len(z85bytes) / 5
values = []
for i in range(0, len(z85bytes), 5):
value = 0
for j, offset in enumerate(_85s):
value += Z85MAP[z85bytes[i+j]] * offset
values.append(value)
return struct.pack('>%dI' % nvalues, *values)
@remdragon
Copy link

Not sure if there would be interest in a "Z85b" protocol to implement some incompatible "improvements"

"""Python implementation of Z85b 85-bit encoding
Z85b is a variation of ZMQ RFC 32 Z85 85-bit encoding.
The differences are:
1) it has been reversed to little-endian encoding ( to facilitate #2 below )
2) requirement for multiple of 4/5 has been eliminated.
3) decode() eliminates whitespace from input
4) decode() throws clearer exception if invalid characters are encountered

This file is a derivative work of z85.py from pyzmq, see it's copyright notice below.
"""

#-----------------------------------------------------------------------------
#
#  Copyright (c) 2013 Brian Granger, Min Ragan-Kelley
#
#  This file is part of pyzmq
#
#  Distributed under the terms of the New BSD License.  The full license is in
#  the file COPYING.BSD, distributed as part of this software.
#-----------------------------------------------------------------------------

import re
import struct

# Z85CHARS is the base 85 symbol table
Z85CHARS = bytearray(b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#")
# Z85MAP maps integers in [0,84] to the appropriate character in Z85CHARS
Z85MAP = dict([(c, idx) for idx, c in enumerate(Z85CHARS)])

_85s = [ 85**i for i in range(5) ]
_epadding = [ 0, 3, 2, 1 ]
_dpadding = [ 0, 4, 3, 2, 1 ]

class Z85DecodeError ( Exception ):
	pass

def encode ( rawbytes ):
	"""encode raw bytes into Z85b"""
	rawbytes = bytearray ( rawbytes )
	padding = _epadding[len(rawbytes) % 4]
	rawbytes = bytearray ( rawbytes + b'\x00' * padding )
	nvalues = (len(rawbytes) + padding) // 4
	
	values = struct.unpack('<%dI' % nvalues, rawbytes)
	encoded = bytearray()
	for v in values:
		for offset in _85s:
			encoded.append(Z85CHARS[(v // offset) % 85])
	
	if padding:
		encoded = encoded[:-padding]
	return bytes(encoded)

def decode ( z85bytes ):
	"""decode Z85b bytes to raw bytes"""
	z85bytes = bytearray ( re.sub ( b'\\s+', b'', z85bytes ) )
	padding = _dpadding[len(z85bytes)%5]
	nvalues = (len(z85bytes) + padding) // 5
	values = []
	for i in range(0, len(z85bytes), 5):
		value = 0
		for j, offset in enumerate(_85s):
			try:
				value += Z85MAP[z85bytes[i+j]] * offset
			except IndexError:
				break # we have reached the end of our input
			except KeyError as e:
				raise Z85DecodeError ( "Invalid byte code {!r}".format ( e.args[0] ) )
		values.append(value)
	decoded = struct.pack('<%dI' % nvalues, *values)
	if padding:
		decoded = decoded[:-padding]
	return decoded

@varalgit
Copy link

varalgit commented Nov 5, 2019

@remdragon: Your version is much appreciated, I'm going to use it only for myself (to encode zipped data as a PNG image comment) so it's not a problem that it is incompatible.

The + padding in this line is not necessary, as the rawbytes array is already a multiple of 4 bytes long:
nvalues = (len(rawbytes) + padding) // 4

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment