Last active
April 9, 2022 09:35
-
-
Save Sangarshanan/c8ed58be40dace29a5ff0ea23307d66c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Huffman Puffman. | |
Visualize the Tree on https://github.com/Sangarshanan/huffman-coding | |
""" | |
import heapq | |
from collections import Counter | |
def encode_message(message): | |
""" | |
Convert the String to a Fixed 8 bit binary | |
ord(character) -> Unicode -> 08b binary | |
""" | |
return ("".join(f"{ord(i):08b}" for i in message)) | |
def decode_message(binary): | |
""" | |
Convert 8 bit binary stream to string | |
int(08 Binary, 2) -> chr(Unicode) -> String | |
""" | |
binary_stream = [binary[bit:bit+8] for bit in range(0, len(binary), 8)] | |
return str(''.join([chr(int(byte, 2)) for byte in binary_stream])) | |
""" | |
Since high frequency characters are more important | |
A perfect implementation candidate is a priority queue | |
https://docs.python.org/3/library/heapq.html | |
""" | |
class HuffmanCode(object): | |
""" | |
Generate Huffman Code | |
""" | |
def __init__(self, message): | |
self.message = message | |
self.characters = [char for char in message] | |
self.symbols = Counter(self.characters) | |
char_count = Counter(self.characters) | |
mapping = dict.fromkeys(char_count.keys(),'') | |
hq = [] | |
for key, value in char_count.items(): | |
heapq.heappush(hq, (value, key)) | |
while len(hq) > 1: | |
freq_left, character_left = heapq.heappop(hq) | |
freq_right, character_right = heapq.heappop(hq) | |
for left in character_left: | |
mapping[left] = '0' + mapping[left] | |
for right in character_right: | |
mapping[right] = '1' + mapping[right] | |
freq_sum =freq_left + freq_right | |
character_sum = f'{character_left}{character_right}' | |
heapq.heappush(hq, (freq_sum, character_sum)) | |
self.mapping = mapping | |
@property | |
def binary_map(self): | |
return self.mapping | |
def encode(self): | |
encoded_binary = '' | |
binary_map = self.mapping | |
for symbol in self.characters: | |
encoded_binary+=binary_map[symbol] | |
return encoded_binary | |
def decode(self, bits): | |
_mapping = {v: k for k, v in self.mapping.items()} | |
decoded_string = '' | |
index = 0 | |
while index < 20: | |
part = bits[:index] | |
if part in _mapping: | |
bits = bits[index:] | |
index = 0 | |
decoded_string+=_mapping[part] | |
else: | |
index+=1 | |
return decoded_string | |
all_star = """ | |
Somebody once told me the world is gonna roll me | |
I ain't the sharpest tool in the shed | |
She was looking kind of dumb with her finger and her thumb | |
In the shape of an "L" on her forehead | |
Well the years start coming and they don't stop coming | |
Fed to the rules and I hit the ground running | |
Didn't make sense not to live for fun | |
Your brain gets smart but your head gets dumb | |
So much to do, so much to see | |
So what's wrong with taking the back streets? | |
You'll never know if you don't go | |
You'll never shine if you don't glow | |
Hey now, you're an all-star, get your game on, go play | |
Hey now, you're a rock star, get the show on, get paid | |
And all that glitters is gold | |
Only shooting stars break the mold | |
It's a cool place and they say it gets colder | |
You're bundled up now, wait 'til you get older | |
But the meteor men beg to differ | |
Judging by the hole in the satellite picture | |
The ice we skate is getting pretty thin | |
The water's getting warm so you might as well swim | |
My world's on fire, how about yours? | |
That's the way I like it and I'll never get bored | |
Hey now, you're an all-star, get your game on, go play | |
Hey now, you're a rock star, get the show on, get paid | |
All that glitters is gold | |
Only shooting stars break the mold | |
Hey now, you're an all-star, get your game on, go play | |
Hey now, you're a rock star, get the show, on get paid | |
And all that glitters is gold | |
Only shooting stars | |
Somebody once asked could I spare some change for gas? | |
I need to get myself away from this place | |
I said, "Yup" what a concept | |
I could use a little fuel myself | |
And we could all use a little change | |
Well, the years start coming and they don't stop coming | |
Fed to the rules and I hit the ground running | |
Didn't make sense not to live for fun | |
Your brain gets smart but your head gets dumb | |
So much to do, so much to see | |
So what's wrong with taking the back streets? | |
You'll never know if you don't go (go!) | |
You'll never shine if you don't glow | |
Hey now, you're an all-star, get your game on, go play | |
Hey now, you're a rock star, get the show on, get paid | |
And all that glitters is gold | |
Only shooting stars break the mold | |
And all that glitters is gold | |
Only shooting stars break the mold | |
""" | |
ones_and_zeros = encode_message(all_star) | |
print("Normal Binary Coding needs",len(ones_and_zeros), "bits") | |
decoded_message = decode_message(ones_and_zeros) | |
print("Can be decoded:",decoded_message == all_star) | |
hc = HuffmanCode(all_star) | |
ones_and_zeros_huffman = hc.encode() | |
print("Huffman Coding needs",len(ones_and_zeros_huffman), "bits") | |
decoded_message_huffman = hc.decode(ones_and_zeros_huffman) | |
print("Can be decoded:",decoded_message_huffman == all_star) | |
reduction = (len(ones_and_zeros_huffman)/len(ones_and_zeros)) * 100 | |
print("Encoded bits reduced",reduction, "percent") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment