IshitaTakeshi/LICENSE

## huffman.py
# The MIT License (MIT)
# Copyright (c) 2016 Ishita Takeshi

from numeric import sort_symbols
from prefixcode import isprefixcode


class AbstractNode(object):
    isleaf = None
    value = None


class Node(AbstractNode):
    def __init__(self, left, right):
        self.left = left
        self.right = right
        self.value = left.value + right.value
        self.isleaf = False


class Leaf(AbstractNode):
    def __init__(self, symbol, probability):
        self.symbol = symbol
        self.value = probability
        self.isleaf = True


def make_tree(probability):
    nodes = [Leaf(s, p) for s, p in probability.items()]
    while len(nodes) > 1:
        nodes.sort(key=lambda node: node.value)
        nodes = [Node(nodes[0], nodes[1])] + nodes[2:]
    return nodes[0]


def traverse(node, codeword="", code={}):
    if node.isleaf:
        code[node.symbol] = codeword
        return code

    code = traverse(node.left, codeword+"0", code)
    code = traverse(node.right, codeword+"1", code)
    return code


def huffman(probability):
    root = make_tree(probability)
    code = traverse(root)
    return code


def test_huffman():
    probability = {"A": 0.10, "B": 0.15, "C": 0.30, "D": 0.16, "E": 0.29}
    code = huffman(probability)
    expected = {"A": "010", "B": "011", "C": "11", "D": "00", "E": "10"}
    assert(code == expected)
    assert(isprefixcode(code))


if __name__ == '__main__':
    test_huffman()

## LICENSE
The MIT License (MIT)

Copyright (c) 2016 Ishita Takeshi

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

## main.py
from shannon_fano import shannon_fano
from shannon_fano_elias import shannon_fano_elias
from huffman import huffman
from util import show


probability = {
    "A": 0.18, "B": 0.08, "C": 0.15, "D": 0.12,
    "E": 0.3, "F": 0.02, "G": 0.1, "H": 0.05
}

print("Shannon-Fano coding")
print("-----------------------------")
code = shannon_fano(probability)
show(probability, code)
print("")

print("Shannon-Fano-Elias coding")
print("-----------------------------")
code = shannon_fano_elias(probability, sort_symbols=True)
show(probability, code)
print("")


print("Huffman coding")
print("-----------------------------")
code = huffman(probability)
show(probability, code)
print("")

## numeric.py
# The MIT License (MIT)
# Copyright (c) 2016 Ishita Takeshi


def sort_symbols(probability):
    symbols = probability.keys()
    return sorted(symbols, key=lambda k: probability[k], reverse=True)

## prefixcode.py
# The MIT License (MIT)
# Copyright (c) 2016 Ishita Takeshi


from copy import copy


def dangling_suffixes(code1, code2):
    def suffixes(code, word):
        N = len(word)
        s = set()
        for c in code:
            if c == word:
                continue
            if c.startswith(word):
                s.add(c[N:])
        return s

    ss = set()
    for c1 in code1:
        ss |= suffixes(code2, c1)
    for c2 in code2:
        ss |= suffixes(code1, c2)
    return ss


def isprefixcode(code):
    S0 = set(code)
    S = dangling_suffixes(S0, S0)
    D = dangling_suffixes(S0, S)

    while S != (S | D):
        S = S | D
        D = dangling_suffixes(S0, S)
    return len(S & S0) == 0


def run(S):
    if isprefixcode(S):
        print("{} is a prefix code.".format(S))
    else:
        print("{} is not a prefix code.".format(S))


if __name__ == '__main__':
    run(['0', '10', '101', '1100', '1110'])
    run(['0', '10', '1011', '1100', '1101'])
    run(['00', '0001', '001', '0011', '011'])
    run(['00', '1000', '11', '110', '1101'])
    run(['0000', '0001', '001', '01', '1'])

## shannon_fano.py
# The MIT License (MIT)
# Copyright (c) 2016 Ishita Takeshi
#
# The algorithm is at
# https://en.wikipedia.org/wiki/Shannon%E2%80%93Fano_coding

from numeric import sort_symbols
from prefixcode import isprefixcode


def split(probability, sorted_symbols):
    split_point = 1
    min_diff = float('inf')
    for i in range(1, len(sorted_symbols)-1):
        left = sum(probability[k] for k in sorted_symbols[:i])
        right = sum(probability[k] for k in sorted_symbols[i:])
        diff = abs(left-right)
        if diff < min_diff:
            split_point = i
            min_diff = diff
    return split_point


def assign_digit(code, symbols, digit):
    for symbol in symbols:
        code.setdefault(symbol, "")
        code[symbol] += digit
    return code


def shannon_fano_(probability, sorted_symbols, code={}):
    if len(sorted_symbols) == 1:
        return code

    split_point = split(probability, sorted_symbols)
    L = sorted_symbols[split_point:]
    R = sorted_symbols[:split_point]

    assign_digit(code, L, "1")
    code = shannon_fano_(probability, L, code)

    assign_digit(code, R, "0")
    code = shannon_fano_(probability, R, code)

    return code


def shannon_fano(probability):
    return shannon_fano_(probability, sort_symbols(probability))


def test_shannon_fano():
    occurrences = {"A": 15, "B": 7, "C": 6, "D": 6, "E": 5}
    probability = {}
    for symbol in occurrences.keys():
        probability[symbol] = occurrences[symbol] / sum(occurrences.values())

    code = shannon_fano(probability)

    # there are 2 patterns of code since occurrences of C and D are same
    expected1 = {"A": "00", "B": "01", "C": "10", "D": "110", "E": "111"}
    expected2 = {"A": "00", "B": "01", "C": "110", "D": "10", "E": "111"}
    assert(code == expected1 or code == expected2)
    assert(isprefixcode(code))


if __name__ == '__main__':
    test_shannon_fano()

## shannon_fano_elias.py
# The MIT License (MIT)
# Copyright (c) 2016 Ishita Takeshi

# The algorithm is at
# https://en.wikipedia.org/wiki/Shannon%E2%80%93Fano%E2%80%93Elias_coding

from math import log2, ceil

from prefixcode import isprefixcode


def shannon_fano_elias(probability, sort_symbols=False):
    symbols = list(probability.keys())

    # sort symbols for execution stability
    if sort_symbols:
        symbols.sort()

    def F(i):
        a = sum(probability[k] for k in symbols[:i])
        b = probability[symbols[i]] / 2
        return a + b

    def L(symbol):
        p = probability[symbol]
        return ceil(-log2(p)) + 1

    def Z(x, n):
        x = round(x, 7)  # avoid rounding error
        assert(0 <= x <= 1)
        if x == 1:
            return '0' * n

        z = ''
        for i in range(1, n+1):
            if x >= pow(2, -i):
                x -= pow(2, -i)
                z += '1'
            else:
                z += '0'
        return z

    return {symbol: Z(F(i), L(symbol)) for i, symbol in enumerate(symbols)}


def test_shannon_fano_elias():
    probability = {"A": 1/3, "B": 1/4, "C": 1/6, "D": 1/4}
    code = shannon_fano_elias(probability, sort_symbols=True)
    expected = {"A": "001", "B": "011", "C": "1010", "D": "111"}
    assert(code == expected)
    assert(isprefixcode(code))


if __name__ == '__main__':
    test_shannon_fano_elias()

## util.py
# The MIT License (MIT)
# Copyright (c) 2016 Ishita Takeshi

def show(probability, code):
    print("Symbol  Probability  Codeword")
    for symbol in sorted(probability.keys()):
        print("{}       {:.2f}         {}".format(
              symbol, probability[symbol], code[symbol]))
    print("")
	# The MIT License (MIT)
	# Copyright (c) 2016 Ishita Takeshi

	from numeric import sort_symbols
	from prefixcode import isprefixcode


	class AbstractNode(object):
	isleaf = None
	value = None


	class Node(AbstractNode):
	def __init__(self, left, right):
	self.left = left
	self.right = right
	self.value = left.value + right.value
	self.isleaf = False


	class Leaf(AbstractNode):
	def __init__(self, symbol, probability):
	self.symbol = symbol
	self.value = probability
	self.isleaf = True


	def make_tree(probability):
	nodes = [Leaf(s, p) for s, p in probability.items()]
	while len(nodes) > 1:
	nodes.sort(key=lambda node: node.value)
	nodes = [Node(nodes[0], nodes[1])] + nodes[2:]
	return nodes[0]


	def traverse(node, codeword="", code={}):
	if node.isleaf:
	code[node.symbol] = codeword
	return code

	code = traverse(node.left, codeword+"0", code)
	code = traverse(node.right, codeword+"1", code)
	return code


	def huffman(probability):
	root = make_tree(probability)
	code = traverse(root)
	return code


	def test_huffman():
	probability = {"A": 0.10, "B": 0.15, "C": 0.30, "D": 0.16, "E": 0.29}
	code = huffman(probability)
	expected = {"A": "010", "B": "011", "C": "11", "D": "00", "E": "10"}
	assert(code == expected)
	assert(isprefixcode(code))


	if __name__ == '__main__':
	test_huffman()
	The MIT License (MIT)

	Copyright (c) 2016 Ishita Takeshi

	Permission is hereby granted, free of charge, to any person obtaining a copy
	of this software and associated documentation files (the "Software"), to deal
	in the Software without restriction, including without limitation the rights
	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	copies of the Software, and to permit persons to whom the Software is
	furnished to do so, subject to the following conditions:

	The above copyright notice and this permission notice shall be included in
	all copies or substantial portions of the Software.

	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	THE SOFTWARE.
	from shannon_fano import shannon_fano
	from shannon_fano_elias import shannon_fano_elias
	from huffman import huffman
	from util import show


	probability = {
	"A": 0.18, "B": 0.08, "C": 0.15, "D": 0.12,
	"E": 0.3, "F": 0.02, "G": 0.1, "H": 0.05
	}

	print("Shannon-Fano coding")
	print("-----------------------------")
	code = shannon_fano(probability)
	show(probability, code)
	print("")

	print("Shannon-Fano-Elias coding")
	print("-----------------------------")
	code = shannon_fano_elias(probability, sort_symbols=True)
	show(probability, code)
	print("")


	print("Huffman coding")
	print("-----------------------------")
	code = huffman(probability)
	show(probability, code)
	print("")
	# The MIT License (MIT)
	# Copyright (c) 2016 Ishita Takeshi


	def sort_symbols(probability):
	symbols = probability.keys()
	return sorted(symbols, key=lambda k: probability[k], reverse=True)
	# The MIT License (MIT)
	# Copyright (c) 2016 Ishita Takeshi


	from copy import copy


	def dangling_suffixes(code1, code2):
	def suffixes(code, word):
	N = len(word)
	s = set()
	for c in code:
	if c == word:
	continue
	if c.startswith(word):
	s.add(c[N:])
	return s

	ss = set()
	for c1 in code1:
	ss \|= suffixes(code2, c1)
	for c2 in code2:
	ss \|= suffixes(code1, c2)
	return ss


	def isprefixcode(code):
	S0 = set(code)
	S = dangling_suffixes(S0, S0)
	D = dangling_suffixes(S0, S)

	while S != (S \| D):
	S = S \| D
	D = dangling_suffixes(S0, S)
	return len(S & S0) == 0


	def run(S):
	if isprefixcode(S):
	print("{} is a prefix code.".format(S))
	else:
	print("{} is not a prefix code.".format(S))


	if __name__ == '__main__':
	run(['0', '10', '101', '1100', '1110'])
	run(['0', '10', '1011', '1100', '1101'])
	run(['00', '0001', '001', '0011', '011'])
	run(['00', '1000', '11', '110', '1101'])
	run(['0000', '0001', '001', '01', '1'])
	# The MIT License (MIT)
	# Copyright (c) 2016 Ishita Takeshi
	#
	# The algorithm is at
	# https://en.wikipedia.org/wiki/Shannon%E2%80%93Fano_coding

	from numeric import sort_symbols
	from prefixcode import isprefixcode


	def split(probability, sorted_symbols):
	split_point = 1
	min_diff = float('inf')
	for i in range(1, len(sorted_symbols)-1):
	left = sum(probability[k] for k in sorted_symbols[:i])
	right = sum(probability[k] for k in sorted_symbols[i:])
	diff = abs(left-right)
	if diff < min_diff:
	split_point = i
	min_diff = diff
	return split_point


	def assign_digit(code, symbols, digit):
	for symbol in symbols:
	code.setdefault(symbol, "")
	code[symbol] += digit
	return code


	def shannon_fano_(probability, sorted_symbols, code={}):
	if len(sorted_symbols) == 1:
	return code

	split_point = split(probability, sorted_symbols)
	L = sorted_symbols[split_point:]
	R = sorted_symbols[:split_point]

	assign_digit(code, L, "1")
	code = shannon_fano_(probability, L, code)

	assign_digit(code, R, "0")
	code = shannon_fano_(probability, R, code)

	return code


	def shannon_fano(probability):
	return shannon_fano_(probability, sort_symbols(probability))


	def test_shannon_fano():
	occurrences = {"A": 15, "B": 7, "C": 6, "D": 6, "E": 5}
	probability = {}
	for symbol in occurrences.keys():
	probability[symbol] = occurrences[symbol] / sum(occurrences.values())

	code = shannon_fano(probability)

	# there are 2 patterns of code since occurrences of C and D are same
	expected1 = {"A": "00", "B": "01", "C": "10", "D": "110", "E": "111"}
	expected2 = {"A": "00", "B": "01", "C": "110", "D": "10", "E": "111"}
	assert(code == expected1 or code == expected2)
	assert(isprefixcode(code))


	if __name__ == '__main__':
	test_shannon_fano()
	# The MIT License (MIT)
	# Copyright (c) 2016 Ishita Takeshi

	def show(probability, code):
	print("Symbol Probability Codeword")
	for symbol in sorted(probability.keys()):
	print("{} {:.2f} {}".format(
	symbol, probability[symbol], code[symbol]))
	print("")