Skip to content

Instantly share code, notes, and snippets.

@urwrstkn8mare
Last active May 23, 2022 08:23
Show Gist options
  • Save urwrstkn8mare/08bbb89cb3ce25e83315aadb682a701f to your computer and use it in GitHub Desktop.
Save urwrstkn8mare/08bbb89cb3ce25e83315aadb682a701f to your computer and use it in GitHub Desktop.
A run length encoding compression script (Comp. Sci. 2020)
# Title: Simple Run Length Encoding Implementation
# Author: Samit Shaikh
# Info: A run length encoding compression (RLE) script.
# This is just a task in my computer science class.
# For more relevant information about the actual
# thing. Just look at the top of the python-RLE-functions.py.
# And yes, RLE compression isn't the best compression
# algorithm out there.
# Purpose: To demonstrate the principles of compression
# using RLE.
# Description: The RLEC and RLED functions compress and
# decompress a binary input stream or ascii
# stream.
# Source: https://gist.github.com/urwrstkn8mare/08bbb89cb3ce25e83315aadb682a701f
# ==================== CONVERTERS =========================
# A function that converts a positive integer to a
# binary with optional padding
def dec2bin(integer: int, padding: int = None) -> str:
# If the integer parameter is not of int type
# then stop the function and return None.
if not isinstance(integer, int):
return None
# If the integer parameter is a negative
# value than stop the function and return None.
if integer < 0:
return None
# If the integer is 0 than the binary will just be 0.
binary = "0"
if integer != 0:
# Otherwise start with an empty string representing the binary.
binary = ""
while integer > 0:
# Puts the remainder of the integer divided by 2,
# in front of the binary string.
binary = str(integer % 2) + binary
# Sets the integer to the integer divided by 2
# with no remainders.
integer = integer // 2
# If the padding parameter is not None pad binary
# with the number of '0's specified in the padding
# parameter. Then return the binary variable either way.
return binary.zfill(0 if padding is None else padding)
# A function that converts a binary string to an integer.
def bin2dec(binary: str) -> int:
# Before continuing, reverse the string to make it easier
# to work with.
binary = binary[::-1]
# Loop through each bit of the binary string. If
# there is something other than a "0" or "1", then
# return None. This will stop the function. Otherwise
# just add 2 to the power of the index of the bit to
# the decimal variable only if the bit is a "1".
decimal = 0
for i, bit in enumerate(binary):
if bit != "0" and bit != "1":
return None
decimal += (2 ** i) * int(bit)
# Return the binary string converted to an integer.
return decimal
def dec2asc(integer: int) -> str:
# If the integer parameter is not an integer, than
# return None stopping the function.
if not isinstance(integer, int):
return None
# If the integer parameter is a negative integer,
# than return None.
if integer < 0:
return None
dec = integer - 32
ascii_table = (
" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN"
"OPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
)
# Return None if the dec variable added to 32 is
# bigger that 31 and smaller than 127, otherwise
# return the character in the ascii table in the
# variable dec numbered position.
return None if not 31 < dec + 32 < 127 else ascii_table[dec]
# Returns the integer value of the ascii character.
def asc2dec(letter: str) -> str:
# If the letter is not a string, than return
# None, stopping the function.
if not isinstance(letter, str):
return None
# If the number of characters in the letter
# parameter is anything other than 1, return
# None, stopping the function.
if len(letter) != 1:
return None
# The ascii_table string represents all the characters
# represented with ascii.
ascii_table = (
" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN"
"OPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
)
# Find the position of the letter parameter in the
# ascii_table. Add 32 to it and return it.
return ascii_table.find(letter) + 32
# ===================== END OF CONVERTERS ========================
# ===================== RUN LENGTH ECODING =======================
# A function that takes an input stream and returns a
# compressed output stream.
def RLEC(inputStream: str) -> str:
# This part is checking the input stream to make
# sure it is actually binary.
# If the input stream only has '1's and '0's than
# return None.
for bit in inputStream:
if bit != "0" and bit != "1":
return None
# This part is actually counting the consecutively
# repeated bits.
# Initialise the runs array with an empty array.
runs = []
# Loop through every bit in the input stream.
for i in range(len(inputStream)):
# if this is not the first bit, the bit is
# equal to the previous bit and the last run
# in the runs array does not have an 'of'
# value of 255 then...
if (
i != 0
and inputStream[i] == inputStream[i - 1]
and runs[-1]["number"] != 255
):
# Add one to the 'number' value of the
# last run in the runs array.
runs[-1]["number"] += 1
else:
# Otherwise, append a new run to the runs
# array with the bit as the 'of' value and
# 1 as the 'number' value.
runs.append({"number": 1, "of": inputStream[i]})
# This part is building the output stream.
# Initialise the output stream with an empty string.
outputStream = ""
# Loop through the runs array.
for run in runs:
# Convert the 'number' value of the run to a binary
# padded to 8 digits and append it to the output
# stream. Then append the 'of' value (bit) to the
# output stream.
outputStream += dec2bin(run["number"], 8) + run["of"]
# Now return the output stream that is now compressed.
return outputStream
# Takes an ascii input stream and returns a compressed
# binary output stream. Note: it could be more efficient
# if I used the simple RLE algorithm on the letters and
# not the individual bits. However, the question required
# that I use the RLEC() function.
def asciiRLEC(inputStream: str) -> str:
# If the input stream is not a string than return None.
if not isinstance(inputStream, str):
return None
# Initialise the output stream with an empty string.
outputStream = ""
# Loop through every character in the input stream
# string.
for char in inputStream:
# Get the integer value of the ascii. Than
# convert that integer to a binary string
# padded to 8 '0's. Append the binary
# string to the output stream.
outputStream += dec2bin(asc2dec(char), 8)
# Return the output stream compressed with the RLEC
# function.
return RLEC(outputStream)
# This function takes an input stream compressed with the
# RLEC() function and returns an uncompressed output stream.
def RLED(inputStream: str) -> str:
# If the input stream is not a string return None to
# indicate that there was a problem and to end the
# execution of the function.
if not isinstance(inputStream, str):
return None
# Initialise output stream with an empty string.
outputStream = ""
for i in range(int(len(inputStream) / 9)):
# Loop through the following code every 9 bits
# in the input stream.
# Calculate the index of the last bit in the run.
endOfRun = ((i + 1) * 9) - 1
outputStream += (
# Get the last bit of the run representing the
# bit to be repeated. Append that to the output
# stream...
inputStream[endOfRun]
# a number of times which you get by converting
# the first 8 bits of the run representing a
# 8-bit binary integer to an integer.
* bin2dec(inputStream[endOfRun - 8 : endOfRun])
)
# Return the output stream that has been built.
return outputStream
# Take the compressed binary input stream and
# return an uncompressed ascii output stream.
def asciiRLED(inputStream: str) -> str:
# Decode the input stream with the RLED
# function and put it in the decodedStream
# variable.
decodedStream = RLED(inputStream)
# Initialise the outputStream with an empty
# string.
outputStream = ""
for i in range(len(decodedStream) + 1):
# Get the number of characters in the
# decoded stream. Than loop the following
# that many times plus one.
# If 'i' is a multiple of 8 than...
if i % 8 == 0 and i != 0:
# Get the 8 bits of the run before the
# i position and put it in 'section'.
section = decodedStream[i - 8 : i]
# Convert the 8 bit binary to an integer,
# than convert that to an ascii character.
# Than append that to the output stream.
outputStream += dec2asc(bin2dec(section))
# Return the output stream.
return outputStream
# ====================== END OF RUN LENGTH ENCODING =======================
# ============================== MAIN LINE ================================
# The following is just to demonstrate the above functions.
def binaryTest(binary: str):
print("\n" + ("=" * 10) + ' Test 1 ' + ('='*10))
print(
"Demonstration of the principles of compression using Run Length "
"Encoding (RLE) principles. \nThis test is on a string containing"
" a binary stream.\n"
)
uncompressed_bitstream = binary
compressed_bitstream = RLEC(uncompressed_bitstream)
if uncompressed_bitstream == RLED(compressed_bitstream):
print("Original input bitstream:", uncompressed_bitstream)
print("Uncompressed length:", len(uncompressed_bitstream))
print("Compressed output bitstream:", compressed_bitstream)
print("Compressed length:", len(compressed_bitstream))
print(
"Efficiency:",
str(
(
1
- (
len(compressed_bitstream)
/ len(uncompressed_bitstream)
)
)
* 100
)
+ "%",
)
else:
print("Compression Failed :(")
print(("=" * 30) + "\n")
def asciiTest(string: str):
print("\n" + ("=" * 11) + ' Test 2 ' + ('='*11))
print(
"Demonstration of the principles of compression using Run Length "
"Encoding (RLE) principles. \nThis test is on a string containing"
" an ascii stream.\n"
)
uncompressed_datastream = string
compressed_bitstream = asciiRLEC(uncompressed_datastream)
if uncompressed_datastream == asciiRLED(compressed_bitstream):
print("Original input datastream:", uncompressed_datastream)
print(
"Uncompressed length (in bits):",
len(uncompressed_datastream) * 8,
)
print("Compressed output bitstream:", compressed_bitstream)
print("Compressed length:", len(compressed_bitstream))
print(
"Efficiency:",
str(
(
1
- (
len(compressed_bitstream)
/ (len(uncompressed_datastream) * 8)
)
)
* 100
)
+ "%",
)
else:
print("Compression Failed :(")
print(("=" * 30) + "\n")
if __name__ == "__main__":
f = open("binary-text.txt")
binaryTest('0111111111111111111111111110')
f.close()
asciiTest("this is a test string.")
# ========================== END OF MAIN LINE =============================
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment