Created
May 11, 2010 09:51
-
-
Save thomasfedb/397132 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import sys | |
lookback = "a" * 2703 | |
front = sys.stdin.read() | |
result = "" | |
lookup = {0: " ", 1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f", 7: "g", 8: "h", | |
9: "i", 10: "j", 11: "k", 12: "l", 13: "m", 14: "n", 15: "o", 16: "p", | |
17: "q", 18: "r", 19: "s", 20: "t", 21: "u", 22: "v", 23: "w", 24: "x", | |
25: "y", 26: "z", 27: "A", 28: "B", 29: "C", 30: "D", 31: "E", 32: "F", | |
33: "G", 34: "H", 35: "I", 36: "J", 37: "K", 38: "L", 39: "M", 40: "N", | |
41: "O", 42: "P", 43: "Q", 44: "R", 45: "S", 46: "T", 47: "U", 48: "V", | |
49: "W", 50: "X", 51: "Y", 52: "Z"} | |
def cmp(a, b): | |
if a[1] == b[1]: | |
return 0 | |
if a[1] > b[1]: | |
return 1 | |
return -1 | |
freq = {' ': 725, 'B': 79, 'D': 66, 'F': 51, 'H': 81, 'J': 58, 'L': 65, 'N': 86, 'P': 80, 'R': 59, 'T': 93, 'V': 78, 'X': 98, 'Z': 47, 'b': 133, 'd': 240, 'f': 445, 'h': 374, 'j': 91, 'l': 311, 'n': 346, 'p': 291, 'r': 398, 't': 470, 'v': 118, 'x': 96, 'z': 56, '%': 1347, 'A': 65, 'C': 67, 'E': 72, 'G': 63, 'I': 89, 'K': 45, 'M': 55, 'O': 58, 'Q': 58, 'S': 89, 'U': 67, 'W': 79, 'Y': 23, 'a': 459, 'c': 228, 'e': 1155, 'g': 322, 'i': 508, 'k': 130, 'm': 187, 'o': 408, 'q': 73, 's': 421, 'u': 215, 'w': 168, 'y': 152} | |
codes = {" ": "", "a": "", "b": "", "c": "", "d": "", "e": "", "f": "", "g": "", "h": "", "i": "", "j": "", "k": "", "l": "", "m": "", "n": "", "o": "", "p": "", "q": "", "r": "", "s": "", "t": "", "u": "", "v": "", "w": "", "x": "", "y": "", "z": "", "A": "", "B": "", "C": "", "D": "", "E": "", "F": "", "G": "", "H": "", "I": "", "J": "", "K": "", "L": "", "M": "", "N": "", "O": "", "P": "", "Q": "", "R": "", "S": "", "T": "", "U": "", "V": "", "W": "", "X": "", "Y": "", "Z": "", "%": ""} | |
encLookup = {} | |
def genCodes(): | |
codes = {" ": "", "a": "", "b": "", "c": "", "d": "", "e": "", "f": "", "g": "", "h": "", "i": "", "j": "", "k": "", "l": "", "m": "", "n": "", "o": "", "p": "", "q": "", "r": "", "s": "", "t": "", "u": "", "v": "", "w": "", "x": "", "y": "", "z": "", "A": "", "B": "", "C": "", "D": "", "E": "", "F": "", "G": "", "H": "", "I": "", "J": "", "K": "", "L": "", "M": "", "N": "", "O": "", "P": "", "Q": "", "R": "", "S": "", "T": "", "U": "", "V": "", "W": "", "X": "", "Y": "", "Z": "", "%": ""} | |
#pirnt codes | |
#pirnt "gen codes" | |
q = [([k], v) for (k, v) in sorted(freq.items(), cmp)] | |
while len(q) > 1: | |
a = q[0] | |
b = q[1] | |
for i in a[0]: | |
codes[i] += "0" | |
for i in b[0]: | |
codes[i] += "1" | |
q[1] = (a[0] + b[0], a[1] + b[1]) | |
q = sorted(q[1:], cmp) | |
#pirnt codes | |
return dict([(k, v[::-1]) for (k, v) in codes.items()]) | |
reverseLookup = dict([(v, k) for (k, v) in lookup.items()]) | |
def stringFromLocation(location): | |
return lookup[location] | |
def locationFromString(string): | |
return reverseLookup[string] | |
# 54 => ab | |
def stringFromLocation2(location): | |
string = "" | |
subtract = 0 | |
string += lookup[((location - (location % 53)) / 53) ] | |
string += lookup[location % 53] | |
return string | |
# ab => 53 | |
def locationFromString2(string): | |
return reverseLookup[string[0]] * 53 + reverseLookup[string[1]] | |
while len(front) > 0: | |
#print "main loop" | |
a = 52 | |
found = False | |
pieceLengthCode = "" | |
piecePositionCode = "" | |
pieceLength = 0 | |
if len(front) > 4: | |
while a > 4 and not found: | |
if len(front) >= a: | |
#print "look loop" | |
piece = front[:a] | |
if lookback.find(piece) != -1: | |
location = lookback.find(piece) | |
#print piece, len(piece) | |
found = True | |
pieceLength = len(piece) | |
#print pieceLength == len(piece) | |
piecePositionCode = stringFromLocation2(location) | |
pieceLengthCode = stringFromLocation(a) | |
a -= 1 | |
if found: | |
#print "found" | |
#print pieceLength | |
result += ("%" + piecePositionCode + pieceLengthCode) | |
#print result | |
#print lookback | |
lookback = (lookback + front[:pieceLength])[-2703:] | |
#print lookback | |
#print front | |
if len(front) == pieceLength: | |
front = "" | |
else: | |
front = front[-(len(front) - pieceLength):] | |
#print front | |
else: | |
#print "not found" | |
#print result | |
#print front | |
result += front[0] | |
#print front[0] | |
#print result | |
lookback = (lookback + front[0])[-2703:] | |
if len(front) == 1: | |
front = "" | |
else: | |
front = front[-(len(front) - 1):] | |
found = False | |
#print result | |
firstStage = result | |
result = "" | |
for c in firstStage: | |
encLookup = genCodes() | |
result += encLookup[c] | |
freq[c] += 1 | |
sys.stdout.write(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment