Last active
May 23, 2022 08:23
-
-
Save urwrstkn8mare/08bbb89cb3ce25e83315aadb682a701f to your computer and use it in GitHub Desktop.
A run length encoding compression script (Comp. Sci. 2020)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Title: Simple Run Length Encoding Implementation | |
# Author: Samit Shaikh | |
# Info: A run length encoding compression (RLE) script. | |
# This is just a task in my computer science class. | |
# For more relevant information about the actual | |
# thing. Just look at the top of the python-RLE-functions.py. | |
# And yes, RLE compression isn't the best compression | |
# algorithm out there. | |
# Purpose: To demonstrate the principles of compression | |
# using RLE. | |
# Description: The RLEC and RLED functions compress and | |
# decompress a binary input stream or ascii | |
# stream. | |
# Source: https://gist.github.com/urwrstkn8mare/08bbb89cb3ce25e83315aadb682a701f | |
# ==================== CONVERTERS ========================= | |
# A function that converts a positive integer to a | |
# binary with optional padding | |
def dec2bin(integer: int, padding: int = None) -> str: | |
# If the integer parameter is not of int type | |
# then stop the function and return None. | |
if not isinstance(integer, int): | |
return None | |
# If the integer parameter is a negative | |
# value than stop the function and return None. | |
if integer < 0: | |
return None | |
# If the integer is 0 than the binary will just be 0. | |
binary = "0" | |
if integer != 0: | |
# Otherwise start with an empty string representing the binary. | |
binary = "" | |
while integer > 0: | |
# Puts the remainder of the integer divided by 2, | |
# in front of the binary string. | |
binary = str(integer % 2) + binary | |
# Sets the integer to the integer divided by 2 | |
# with no remainders. | |
integer = integer // 2 | |
# If the padding parameter is not None pad binary | |
# with the number of '0's specified in the padding | |
# parameter. Then return the binary variable either way. | |
return binary.zfill(0 if padding is None else padding) | |
# A function that converts a binary string to an integer. | |
def bin2dec(binary: str) -> int: | |
# Before continuing, reverse the string to make it easier | |
# to work with. | |
binary = binary[::-1] | |
# Loop through each bit of the binary string. If | |
# there is something other than a "0" or "1", then | |
# return None. This will stop the function. Otherwise | |
# just add 2 to the power of the index of the bit to | |
# the decimal variable only if the bit is a "1". | |
decimal = 0 | |
for i, bit in enumerate(binary): | |
if bit != "0" and bit != "1": | |
return None | |
decimal += (2 ** i) * int(bit) | |
# Return the binary string converted to an integer. | |
return decimal | |
def dec2asc(integer: int) -> str: | |
# If the integer parameter is not an integer, than | |
# return None stopping the function. | |
if not isinstance(integer, int): | |
return None | |
# If the integer parameter is a negative integer, | |
# than return None. | |
if integer < 0: | |
return None | |
dec = integer - 32 | |
ascii_table = ( | |
" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN" | |
"OPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" | |
) | |
# Return None if the dec variable added to 32 is | |
# bigger that 31 and smaller than 127, otherwise | |
# return the character in the ascii table in the | |
# variable dec numbered position. | |
return None if not 31 < dec + 32 < 127 else ascii_table[dec] | |
# Returns the integer value of the ascii character. | |
def asc2dec(letter: str) -> str: | |
# If the letter is not a string, than return | |
# None, stopping the function. | |
if not isinstance(letter, str): | |
return None | |
# If the number of characters in the letter | |
# parameter is anything other than 1, return | |
# None, stopping the function. | |
if len(letter) != 1: | |
return None | |
# The ascii_table string represents all the characters | |
# represented with ascii. | |
ascii_table = ( | |
" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN" | |
"OPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" | |
) | |
# Find the position of the letter parameter in the | |
# ascii_table. Add 32 to it and return it. | |
return ascii_table.find(letter) + 32 | |
# ===================== END OF CONVERTERS ======================== | |
# ===================== RUN LENGTH ECODING ======================= | |
# A function that takes an input stream and returns a | |
# compressed output stream. | |
def RLEC(inputStream: str) -> str: | |
# This part is checking the input stream to make | |
# sure it is actually binary. | |
# If the input stream only has '1's and '0's than | |
# return None. | |
for bit in inputStream: | |
if bit != "0" and bit != "1": | |
return None | |
# This part is actually counting the consecutively | |
# repeated bits. | |
# Initialise the runs array with an empty array. | |
runs = [] | |
# Loop through every bit in the input stream. | |
for i in range(len(inputStream)): | |
# if this is not the first bit, the bit is | |
# equal to the previous bit and the last run | |
# in the runs array does not have an 'of' | |
# value of 255 then... | |
if ( | |
i != 0 | |
and inputStream[i] == inputStream[i - 1] | |
and runs[-1]["number"] != 255 | |
): | |
# Add one to the 'number' value of the | |
# last run in the runs array. | |
runs[-1]["number"] += 1 | |
else: | |
# Otherwise, append a new run to the runs | |
# array with the bit as the 'of' value and | |
# 1 as the 'number' value. | |
runs.append({"number": 1, "of": inputStream[i]}) | |
# This part is building the output stream. | |
# Initialise the output stream with an empty string. | |
outputStream = "" | |
# Loop through the runs array. | |
for run in runs: | |
# Convert the 'number' value of the run to a binary | |
# padded to 8 digits and append it to the output | |
# stream. Then append the 'of' value (bit) to the | |
# output stream. | |
outputStream += dec2bin(run["number"], 8) + run["of"] | |
# Now return the output stream that is now compressed. | |
return outputStream | |
# Takes an ascii input stream and returns a compressed | |
# binary output stream. Note: it could be more efficient | |
# if I used the simple RLE algorithm on the letters and | |
# not the individual bits. However, the question required | |
# that I use the RLEC() function. | |
def asciiRLEC(inputStream: str) -> str: | |
# If the input stream is not a string than return None. | |
if not isinstance(inputStream, str): | |
return None | |
# Initialise the output stream with an empty string. | |
outputStream = "" | |
# Loop through every character in the input stream | |
# string. | |
for char in inputStream: | |
# Get the integer value of the ascii. Than | |
# convert that integer to a binary string | |
# padded to 8 '0's. Append the binary | |
# string to the output stream. | |
outputStream += dec2bin(asc2dec(char), 8) | |
# Return the output stream compressed with the RLEC | |
# function. | |
return RLEC(outputStream) | |
# This function takes an input stream compressed with the | |
# RLEC() function and returns an uncompressed output stream. | |
def RLED(inputStream: str) -> str: | |
# If the input stream is not a string return None to | |
# indicate that there was a problem and to end the | |
# execution of the function. | |
if not isinstance(inputStream, str): | |
return None | |
# Initialise output stream with an empty string. | |
outputStream = "" | |
for i in range(int(len(inputStream) / 9)): | |
# Loop through the following code every 9 bits | |
# in the input stream. | |
# Calculate the index of the last bit in the run. | |
endOfRun = ((i + 1) * 9) - 1 | |
outputStream += ( | |
# Get the last bit of the run representing the | |
# bit to be repeated. Append that to the output | |
# stream... | |
inputStream[endOfRun] | |
# a number of times which you get by converting | |
# the first 8 bits of the run representing a | |
# 8-bit binary integer to an integer. | |
* bin2dec(inputStream[endOfRun - 8 : endOfRun]) | |
) | |
# Return the output stream that has been built. | |
return outputStream | |
# Take the compressed binary input stream and | |
# return an uncompressed ascii output stream. | |
def asciiRLED(inputStream: str) -> str: | |
# Decode the input stream with the RLED | |
# function and put it in the decodedStream | |
# variable. | |
decodedStream = RLED(inputStream) | |
# Initialise the outputStream with an empty | |
# string. | |
outputStream = "" | |
for i in range(len(decodedStream) + 1): | |
# Get the number of characters in the | |
# decoded stream. Than loop the following | |
# that many times plus one. | |
# If 'i' is a multiple of 8 than... | |
if i % 8 == 0 and i != 0: | |
# Get the 8 bits of the run before the | |
# i position and put it in 'section'. | |
section = decodedStream[i - 8 : i] | |
# Convert the 8 bit binary to an integer, | |
# than convert that to an ascii character. | |
# Than append that to the output stream. | |
outputStream += dec2asc(bin2dec(section)) | |
# Return the output stream. | |
return outputStream | |
# ====================== END OF RUN LENGTH ENCODING ======================= | |
# ============================== MAIN LINE ================================ | |
# The following is just to demonstrate the above functions. | |
def binaryTest(binary: str): | |
print("\n" + ("=" * 10) + ' Test 1 ' + ('='*10)) | |
print( | |
"Demonstration of the principles of compression using Run Length " | |
"Encoding (RLE) principles. \nThis test is on a string containing" | |
" a binary stream.\n" | |
) | |
uncompressed_bitstream = binary | |
compressed_bitstream = RLEC(uncompressed_bitstream) | |
if uncompressed_bitstream == RLED(compressed_bitstream): | |
print("Original input bitstream:", uncompressed_bitstream) | |
print("Uncompressed length:", len(uncompressed_bitstream)) | |
print("Compressed output bitstream:", compressed_bitstream) | |
print("Compressed length:", len(compressed_bitstream)) | |
print( | |
"Efficiency:", | |
str( | |
( | |
1 | |
- ( | |
len(compressed_bitstream) | |
/ len(uncompressed_bitstream) | |
) | |
) | |
* 100 | |
) | |
+ "%", | |
) | |
else: | |
print("Compression Failed :(") | |
print(("=" * 30) + "\n") | |
def asciiTest(string: str): | |
print("\n" + ("=" * 11) + ' Test 2 ' + ('='*11)) | |
print( | |
"Demonstration of the principles of compression using Run Length " | |
"Encoding (RLE) principles. \nThis test is on a string containing" | |
" an ascii stream.\n" | |
) | |
uncompressed_datastream = string | |
compressed_bitstream = asciiRLEC(uncompressed_datastream) | |
if uncompressed_datastream == asciiRLED(compressed_bitstream): | |
print("Original input datastream:", uncompressed_datastream) | |
print( | |
"Uncompressed length (in bits):", | |
len(uncompressed_datastream) * 8, | |
) | |
print("Compressed output bitstream:", compressed_bitstream) | |
print("Compressed length:", len(compressed_bitstream)) | |
print( | |
"Efficiency:", | |
str( | |
( | |
1 | |
- ( | |
len(compressed_bitstream) | |
/ (len(uncompressed_datastream) * 8) | |
) | |
) | |
* 100 | |
) | |
+ "%", | |
) | |
else: | |
print("Compression Failed :(") | |
print(("=" * 30) + "\n") | |
if __name__ == "__main__": | |
f = open("binary-text.txt") | |
binaryTest('0111111111111111111111111110') | |
f.close() | |
asciiTest("this is a test string.") | |
# ========================== END OF MAIN LINE ============================= |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment