Skip to content

Instantly share code, notes, and snippets.

@aryamansharda
Created January 22, 2021 03:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aryamansharda/b04e35504e4f418411311c9324bce282 to your computer and use it in GitHub Desktop.
Save aryamansharda/b04e35504e4f418411311c9324bce282 to your computer and use it in GitHub Desktop.
LZW Encoder
import sys
from sys import argv
from struct import *
def compress():
# Building and initializing the dictionary.
dictionary_size = 256
dictionary = {chr(i): i for i in range(dictionary_size)}
# We'll start off our phrase as empty and add characters to it as we encounter them
phrase = ""
# This will store the sequence of codes we'll eventually write to disk
compressed_data = []
# Load the text
input_file = open(input_file_name)
data = input_file.read()
# Iterating through the input text character by character
for symbol in data:
# Get input symbol.
string_plus_symbol = phrase + symbol
# If we have a match, we'll skip over it
# This is how we build up to support larger phrases
if string_plus_symbol in dictionary:
phrase = string_plus_symbol
else:
# We'll add the existing phrase (without the breaking character) to our output
compressed_data.append(dictionary[phrase])
# We'll create a new code (if space permits)
if(len(dictionary) <= maximum_table_size):
dictionary[string_plus_symbol] = dictionary_size
dictionary_size += 1
phrase = symbol
if phrase in dictionary:
compressed_data.append(dictionary[phrase])
# Storing the compressed string into a file (byte-wise).
out = input_file_name.split(".")[0]
output_file = open(out + ".lzw", "wb")
for data in compressed_data:
# Saves the code as an unsigned short
output_file.write(pack('>H',int(data)))
output_file.close()
input_file.close()
# Usage
input_file_name = "ataleoftwocities.txt"
# Defining the maximum table size
# It's important that the encoder and decoder agree on the code_width
code_width = 12
maximum_table_size = pow(2,int(code_width))
compress()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment