Skip to content

Instantly share code, notes, and snippets.

@danthedaniel
Last active February 3, 2019 01:09
Show Gist options
  • Save danthedaniel/350124ca1ae8b5e5ca3f9436055e0512 to your computer and use it in GitHub Desktop.
Save danthedaniel/350124ca1ae8b5e5ca3f9436055e0512 to your computer and use it in GitHub Desktop.
Python Run-Length Encoder
import sys
from dataclasses import dataclass
from itertools import groupby
from typing import List
from tqdm import tqdm
@dataclass
class Run:
value: bytes
length: int # Must be 0..255
def decode(self) -> bytes:
return self.length * self.value
def encode(self) -> bytes:
return bytes([self.length]) + self.value
def chunks(n: int, chunk_size: int) -> List[int]:
"""Break an integer into a list of chunks of a maximum size.
The list should have a sum with the same value as n.
"""
return [chunk_size] * (n // chunk_size) + [n % chunk_size]
def compress(data: bytes) -> bytes:
"""Perform run-length encoding on a series of bytes."""
output = bytes()
for value, group in tqdm(groupby(data)):
value = bytes([value])
length = len(list(group))
# Each run can only be 255 bytes long, so the full run length is broken
# into chunks with a max size of 255.
for chunk_length in chunks(length, 255):
output += Run(value, chunk_length).encode()
return output
def decompress(data: bytes) -> bytes:
"""Decompress run-length encoded data."""
if len(data) % 2 == 1:
raise ValueError("Data must have even number of bytes.")
output = bytes()
# Data is encoded in pairs of bytes
for x in range(0, len(data), 2):
# First byte is length
length = data[x]
# Next byte is the actual value. A slice is used to keep the byte type.
value = data[x + 1:x + 2]
output += Run(value, length).decode()
return output
def main():
if len(sys.argv) != 4:
print("Usage: compressor <mode> <input> <output>")
print("")
print("Mode: -c OR -d")
sys.exit(1)
mode = sys.argv[1]
with open(sys.argv[2], "rb") as source, open(sys.argv[3], 'wb') as dest:
if mode == "-c":
dest.write(compress(source.read()))
elif mode == "-d":
dest.write(decompress(source.read()))
else:
print("Unknown mode {}".format(mode))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment