Skip to content

Instantly share code, notes, and snippets.

@holly-hacker
Created July 7, 2018 15:00
Show Gist options
  • Save holly-hacker/52ebf0a55230bc1b2a0b5b5cd7a10baf to your computer and use it in GitHub Desktop.
Save holly-hacker/52ebf0a55230bc1b2a0b5b5cd7a10baf to your computer and use it in GitHub Desktop.
namespace SMAZ {
const codebook: { [idx: string]: number } = {
// tslint:disable-next-line:object-literal-sort-keys
" ": 0, "the": 1, "e": 2, "t": 3, "a": 4, "of": 5, "o": 6, "and": 7, "i": 8, "n": 9, "s": 10, "e ": 11, "r": 12, " th": 13, " t": 14,
"in": 15, "he": 16, "th": 17, "h": 18, "he ": 19, "to": 20, "\r\n": 21, "l": 22, "s ": 23, "d": 24, " a": 25, "an": 26, "er": 27, "c": 28,
" o": 29, "d ": 30, "on": 31, " of": 32, "re": 33, "of ": 34, "t ": 35, ", ": 36, "is": 37, "u": 38, "at": 39, " ": 40, "n ": 41, "or": 42,
"which": 43, "f": 44, "m": 45, "as": 46, "it": 47, "that": 48, "\n": 49, "was": 50, "en": 51, " ": 52, " w": 53, "es": 54, " an": 55,
" i": 56, "\r": 57, "f ": 58, "g": 59, "p": 60, "nd": 61, " s": 62, "nd ": 63, "ed ": 64, "w": 65, "ed": 66, "http://": 67, "for": 68,
"te": 69, "ing": 70, "y ": 71, "The": 72, " c": 73, "ti": 74, "r ": 75, "his": 76, "st": 77, " in": 78, "ar": 79, "nt": 80, ",": 81,
" to": 82, "y": 83, "ng": 84, " h": 85, "with": 86, "le": 87, "al": 88, "to ": 89, "b": 90, "ou": 91, "be": 92, "were": 93, " b": 94,
"se": 95, "o ": 96, "ent": 97, "ha": 98, "ng ": 99, "their": 100, "\"": 101, "hi": 102, "from": 103, " f": 104, "in ": 105, "de": 106,
"ion": 107, "me": 108, "v": 109, ".": 110, "ve": 111, "all": 112, "re ": 113, "ri": 114, "ro": 115, "is ": 116, "co": 117, "f t": 118,
"are": 119, "ea": 120, ". ": 121, "her": 122, " m": 123, "er ": 124, " p": 125, "es ": 126, "by": 127, "they": 128, "di": 129, "ra": 130,
"ic": 131, "not": 132, "s,": 133, "d t": 134, "at ": 135, "ce": 136, "la": 137, "h ": 138, "ne": 139, "as ": 140, "tio": 141, "on ": 142,
"n t": 143, "io": 144, "we": 145, " a ": 146, "om": 147, ", a": 148, "s o": 149, "ur": 150, "li": 151, "ll": 152, "ch": 153, "had": 154,
"this": 155, "e t": 156, "g ": 157, "e\r\n": 158, " wh": 159, "ere": 160, " co": 161, "e o": 162, "a ": 163, "us": 164, " d": 165, "ss": 166,
"\n\r\n": 167, "\r\n\r": 168, "=\"": 169, " be": 170, " e": 171, "s a": 172, "ma": 173, "one": 174, "t t": 175, "or ": 176, "but": 177,
"el": 178, "so": 179, "l ": 180, "e s": 181, /*"s,": 182,*/ "no": 183, "ter": 184, " wa": 185, "iv": 186, "ho": 187, "e a": 188, " r": 189,
"hat": 190, "s t": 191, "ns": 192, "ch ": 193, "wh": 194, "tr": 195, "ut": 196, "/": 197, "have": 198, "ly ": 199, "ta": 200, " ha": 201,
" on": 202, "tha": 203, "-": 204, " l": 205, "ati": 206, "en ": 207, "pe": 208, " re": 209, "there": 210, "ass": 211, "si": 212, " fo": 213,
"wa": 214, "ec": 215, "our": 216, "who": 217, "its": 218, "z": 219, "fo": 220, "rs": 221, ">": 222, "ot": 223, "un": 224, "<": 225, "im": 226,
"th ": 227, "nc": 228, "ate": 229, "><": 230, "ver": 231, "ad": 232, " we": 233, "ly": 234, "ee": 235, " n": 236, "id": 237, " cl": 238,
"ac": 239, "il": 240, "</": 241, "rt": 242, " wi": 243, "div": 244, "e, ": 245, " it": 246, "whi": 247, " ma": 248, "ge": 249, "x": 250,
"e c": 251, "men": 252, ".com": 253,
};
const reverseCodebook = [
" ", "the", "e", "t", "a", "of", "o", "and", "i", "n", "s", "e ", "r", " th", " t", "in", "he", "th", "h", "he ", "to", "\r\n", "l", "s ",
"d", " a", "an", "er", "c", " o", "d ", "on", " of", "re", "of ", "t ", ", ", "is", "u", "at", " ", "n ", "or", "which", "f", "m", "as",
"it", "that", "\n", "was", "en", " ", " w", "es", " an", " i", "\r", "f ", "g", "p", "nd", " s", "nd ", "ed ", "w", "ed", "http://", "for",
"te", "ing", "y ", "The", " c", "ti", "r ", "his", "st", " in", "ar", "nt", ",", " to", "y", "ng", " h", "with", "le", "al", "to ", "b", "ou",
"be", "were", " b", "se", "o ", "ent", "ha", "ng ", "their", "\"", "hi", "from", " f", "in ", "de", "ion", "me", "v", ".", "ve", "all", "re ",
"ri", "ro", "is ", "co", "f t", "are", "ea", ". ", "her", " m", "er ", " p", "es ", "by", "they", "di", "ra", "ic", "not", "s, ", "d t",
"at ", "ce", "la", "h ", "ne", "as ", "tio", "on ", "n t", "io", "we", " a ", "om", ", a", "s o", "ur", "li", "ll", "ch", "had", "this",
"e t", "g ", "e\r\n", " wh", "ere", " co", "e o", "a ", "us", " d", "ss", "\n\r\n", "\r\n\r", "=\"", " be", " e", "s a", "ma", "one", "t t",
"or ", "but", "el", "so", "l ", "e s", "s,", "no", "ter", " wa", "iv", "ho", "e a", " r", "hat", "s t", "ns", "ch ", "wh", "tr", "ut", "/",
"have", "ly ", "ta", " ha", " on", "tha", "-", " l", "ati", "en ", "pe", " re", "there", "ass", "si", " fo", "wa", "ec", "our", "who", "its",
"z", "fo", "rs", ">", "ot", "un", "<", "im", "th ", "nc", "ate", "><", "ver", "ad", " we", "ly", "ee", " n", "id", " cl", "ac", "il", "</",
"rt", " wi", "div", "e, ", " it", "whi", " ma", "ge", "x", "e c", "men", ".com",
];
export function compress(input: string) {
let verbatim = "";
let output: string[] = [];
let inputIndex = 0;
while (inputIndex < input.length) {
let encoded = false;
let j = 7;
if (input.length - inputIndex < 7)
j = input.length - inputIndex;
for (; j >= 0; j--) {
let code = codebook[input.substr(inputIndex, j)];
if (code != null) {
if (verbatim) {
output = output.concat(flush_verbatim(verbatim));
verbatim = "";
}
output.push(String.fromCharCode(code));
inputIndex += j;
encoded = true;
break;
}
}
if (!encoded) {
verbatim += input[inputIndex];
inputIndex++;
if (verbatim.length === 256) {
output = output.concat(flush_verbatim(verbatim));
verbatim = "";
}
}
}
if (verbatim)
output = output.concat(flush_verbatim(verbatim));
return output.join("");
}
export function decompress(inputString: string) {
let output = "";
// Apparently the fastest way: https://jsperf.com/strings-to-number-arrays
let input: number[] = [];
for (let x = 0; x < inputString.length; x++)
input.push(inputString.charCodeAt(x));
let i = 0;
while (i < input.length) {
if (input[i] === 254) {
if (i + 1 > input.length)
throw new Error("Malformed SMAZ");
output += inputString[i + 1];
i += 2;
} else if (input[i] === 255) {
if (i + input[i + 1] + 2 >= input.length)
throw new Error("Malformed SMAZ");
for (let j = 0; j < input[i + 1] + 1; j++)
output += inputString[i + 2 + j];
i += 3 + input[i + 1];
} else {
output += reverseCodebook[input[i]];
i++;
}
}
return output;
}
function flush_verbatim(verbatim: string) {
let output: string[] = [];
if (verbatim.length > 1) {
output.push(String.fromCharCode(255));
output.push(String.fromCharCode(verbatim.length - 1));
} else {
output.push(String.fromCharCode(254));
}
for (const x of verbatim)
output.push(x);
return output;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment