Skip to content

Instantly share code, notes, and snippets.

@lynzrand
Last active October 30, 2022 16:31
Show Gist options
  • Save lynzrand/2180dbd7defb6e6dd3b20603efe948e6 to your computer and use it in GitHub Desktop.
Save lynzrand/2180dbd7defb6e6dd3b20603efe948e6 to your computer and use it in GitHub Desktop.
用最短的代码画一只刘看山!当然弄巧成拙太长了
from typing import Any
import bitstream
liukanshan = open("liukanshan.txt", "r").read()
lks_lines = liukanshan.splitlines()
rle = []
# do run-length encoding of the lines
for line in lks_lines:
rle_line = []
run = 0
last_c = ' '
for c in line:
if c == last_c:
run += 1
else:
rle_line.append(run)
last_c = c
run = 1
if (run != 1):
rle_line.append(run)
rle.append(rle_line)
rle_single = []
for line in rle:
rle_single += line
rle_single += [0]
# Generates the huffman encoding for input
def gen_huffman(input: list[int]):
# count the number of times each value occurs
counts = {}
for i in input:
if i in counts:
counts[i] += 1
else:
counts[i] = 1
# create a list of tuples (count, value)
counts = [(counts[i], i) for i in counts]
# sort the list by count
counts.sort()
# create a list of tuples (count, value, children)
counts: list[Any] = [(i[0], i[1], None) for i in counts]
# while there are more than 1 items in the list
while len(counts) > 1:
# get the first two items
a = counts.pop(0)
b = counts.pop(0)
# create a new item with the sum of the counts
# and the children being the two items
c = (a[0] + b[0], None, (a, b))
# insert the new item into the list
counts.append(c)
# sort the list
counts.sort(key=lambda x: x[0])
# create a list of tuples (value, huffman code)
huffman = []
def traverse_tree(node: tuple, code: str):
if node[1] is not None:
huffman.append((node[1], code))
else:
traverse_tree(node[2][0], code + '0')
traverse_tree(node[2][1], code + '1')
traverse_tree(counts[0], '')
return huffman
# Generates the canonical huffman encoding for input
def gen_canonical_huffman(input: list[int]):
# huff is (value, code)
huff = gen_huffman(input)
# sort the list first by length, then by value
huff.sort(key=lambda x: (len(x[1]), x[0]))
# Each of the existing codes are replaced with a new one of the same length, using the following algorithm:
#
# - The first symbol in the list gets assigned a codeword which is the same length as the symbol's original codeword but all zeros. This will often be a single zero ('0').
# - Each subsequent symbol is assigned the next binary number in sequence, ensuring that following codes are always higher in value.
# - When you reach a longer codeword, then after incrementing, append zeros until the length of the new codeword is equal to the length of the old codeword. This can be thought of as a left shift.
canonical_huff = []
canonical_huff.append((huff[0][0], "0" * len(huff[0][1])))
for code in huff[1:]:
# get the last code
last_code = canonical_huff[-1][1]
# convert the last code to an int
last_code_i = int(last_code, 2)
# increment the last code
last_code_i += 1
def pad_zeros(code: str, length: int):
while len(code) < length:
code = "0" + code
return code
# pad the last code with zeros
this_code = pad_zeros(bin(last_code_i)[2:], len(last_code))
# append zeros until the length of the new code is equal to the length of the old code
while len(this_code) < len(code[1]):
this_code += '0'
canonical_huff.append((code[0], this_code))
return canonical_huff
def encode_canonical_huffman(huffman: list[tuple[int, str]]):
huffman.sort(key=lambda x: x[1])
encoded = []
last_len = -1
last_sym = 0
for sym, code in huffman:
if len(code) != last_len:
encoded.append(-(len(code)))
last_len = len(code)
last_sym = 0
encoded.append(sym - last_sym)
last_sym = sym
return encoded
# Also encode the input using the canonical huffman encoding
def encode_with_huffman(input: list[int],
huffman: list[tuple[int, str]]) -> bitstream.BitStream:
# sort the huffman encoding by value
huffman.sort(key=lambda x: x[0])
# create a dict of value -> code
huffman_dict = {}
for value, code in huffman:
huffman_dict[value] = code
# encode the input
bs = bitstream.BitStream()
for i in input:
huff_code = huffman_dict[i]
# print("writing", i, huff_code)
for c in huff_code:
bs.write(c == '1', bool)
return bs
def huffman_tree_to_str(encoded_huff: list[int]):
s = ""
for i in encoded_huff:
s += chr(ord('A') + i)
return s
# The main encoding is a bitstream encoded as a custom base64 table from ASCII
# code 35 to 99. Bits is encoded from lower to higher in each 6-bit number.
# Like this:
# bitstream: 001110111000101010
# ==> 001110 111000 101010
# ==> | 011100 | 000111 | 010101 |
# This is to make reading easier, since shifting right is all you need.
def base64_encode_bitstream(encoded: bitstream.BitStream):
encoded = encoded.copy()
curr = 0
offset = 0
s = ""
l = len(encoded)
while offset < l:
curr |= encoded.read(bool) << (offset % 6)
offset += 1
if offset % 6 == 0:
s += chr(curr + 35)
curr = 0
if offset % 6 != 0:
s += chr(curr + 35)
return s
huffman = gen_canonical_huffman(rle_single)
encoded_huffman = encode_canonical_huffman(huffman)
# if we use 4 bits per symbol, the length of the canonical huffman encoding is
print(huffman)
print(encoded_huffman)
print(huffman_tree_to_str(encoded_huffman))
encoded_text = encode_with_huffman(rle_single, huffman)
str_encoded = base64_encode_bitstream(encoded_text)
print(rle_single)
# print(encoded_text)
print(str_encoded)
print(len(str_encoded))
// 原始版本
#include <stdio.h>
// huffman tree encoding:
// if a node at pos [i] is positive, it is the number that the code is
// representing. else, it has two children located at [2*i] and [2*i+1]
int huffman[512];
// int encoded_huff[] = {-2, 0, -3, 2, 8, -4, 1, -5, 3, 2, 1, 3, 15, 8,
// 25,
// -6, 4, 3, 1, 3, 1, 13, 20, 11, -7, 13, 2, 2, 2, 7,
// 3, 8, -8, 20, 13, 9, 4, 1, 4, 3, 1, 3, 3};
// offset by 'A'
char *encoded_huff = ">ADH=CEB<BDEBCV[;FHBMBTM:OEDGDIY9PBDBEJBJDCEDBD";
int code_to_offset(int code, int len) {
int c = 1;
for (int i = 0; i < len; i++) {
c = c * 2 + (code >> (len - i - 1)) % 2;
}
return c;
}
/*
Given a list of symbols sorted by bit-length, the following pseudocode will
print a canonical Huffman code book:
code := 0
while more symbols do
print symbol, code
code := (code + 1) << ((bit length of the next symbol) − (current bit
length))
*/
void decode_huff() {
for (int i = 0; i < 512; i++) {
huffman[i] = -1;
}
int code = 0; // current huffman code
int sym = 0; // current symbol
int curr_len = 0; // current huffman code length
for (int i = 0; encoded_huff[i] != 0; i++) {
int c = encoded_huff[i] - 'A';
if (c < 0) {
// c is -len
curr_len = -c;
sym = 0;
code <<= 1;
} else {
sym += c;
int off = code_to_offset(code, curr_len);
huffman[off] = sym;
// printf("decode: %i, code %x, len %i\n", sym, code, curr_len);
code++;
}
}
}
/*
The main encoding is a bitstream encoded as a custom base64 table from ASCII
code 35 to 99. Bits is encoded from lower to higher in each 6-bit number.
Like this:
bitstream: 001110111000101010
==> 001110 111000 101010
==> | 011100 | 000111 | 010101 |
This is to make reading easier, since shifting right is all you need.
*/
int refill_base64(char *b64) {
int v = *b64++ - 35;
v |= (*b64++ - 35) << 6;
v |= (*b64 - 35) << 12;
return v;
}
void decode_and_print(char *data, int len) {
char *init = data + len;
int decoded = 0; // currently decoded data
int decoded_len = 0; // decoded data length (bits)
int rl = 0;
int color = 0; // 0: ' ', 1: '@'
// decode the next run-length encoding
while (1) { // outer loop: for each rle
int huff_entry = 1; // Node ID in tree
while (huffman[huff_entry] < 0) { // inner loop: for each bit
// < 0 == not leaf
if (!decoded_len) {
// refill bits
decoded = refill_base64(data);
decoded_len = 18;
data += 3;
}
int b = decoded % 2;
// printf("%i", b);
decoded >>= 1;
decoded_len--;
huff_entry = huff_entry * 2 + b;
}
int rle = huffman[huff_entry];
// printf("%i ", rle);
for (int i = 0; i < rle; i++) {
putchar(color ? '@' : ' ');
}
color = !color;
if (rle == 0) {
// line feed, reset color
color = 0;
putchar('\n');
}
if (data > init)
break;
}
}
int main() {
decode_huff();
decode_and_print(
"B(R9Sa?^'YL*OG'YPII'=\\>Y,6%=a??PE-J=_\\N(7<^W@*UA[3Yb1'2+':`0%TX;H3?>="
"F3_C3_CD$TD<(+1O7$E&=1'b/"
"Z'':LMP#EN<8='EN<(V'E>1X%D[XHV'EZE.'E&ES*'7&%.J\\C+<1*'L7S$4-(?3GOS)')@^"
"CCPb+_UR'V\\B+9Ia1FJE^(_UU_(_Y*K^8*@R]K^8*@R]K^8*`573R]8_A7%#",
225);
return 0;
}
@@@@@@@ @@@@@@@@
@@@@@@@@@@@@ @@@@@@@@@@@@@@
@@@ @@@@@@@ @@@@@@@@ @@@@@@@@@@
@@ @@@@@@@@@@ @@@@@@@@@@
@@@@ @@ @@ @@@@@@@@@@
@@@@@@@@@@@@ @@@@@@@ @@@@@@@@@
@@@ @@@@@@@@@@@ @@@@@@@@@@@@@@@@ @@@@@@@@
@@@ @@@@@@@@@@@@@@ @@@@@@
@@ @@@@@@
@@ @@@@@@
@@@ @@@
@@@ @@@@@@ @@@
@@@ @@@@@@@ @@@
@@@ @@@@@ @@@
@@@ @@@
@@@ @@@ @@
@@@ @@ @@@@@@@@
@@@ @@ @@@@@@@@
@@@ @@@ @@@@@@
@@@ @@@@ @@@@@@@@@@
@@@ @@@ @@@@@@@@@ @@@@@@@
@@@ @@@@ @@@@@@@@ @@@
@@@ @@@@ @@@@@@@ @@@
@@@ @@@@@@ @@@@@@@@@@ @@@
@@@ @@@@@@@@@@@@@ @@@
@@@ @@@@@@@ @@@
@@@ @@@
@@@@@@ @@@
@@@@@@@@@@ @@@
@@@@ @@@ @@@
@@ @@@ @@@
@ @@@ @@@
@ @@@ @@@
@@ @@ @@@
@@@ @@ @@@
@@@@@@@@@@@ @@@
@@@@@@@@ @@@
@@@@ @@@
@@@@@ @@@@
@@@@@@@ @@@@@@@@
@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@ @@@@@@
@@@@@@ @@@@@@
@@@@@@ @@@@@@
@@@@@@ @@@@@@
@@@@@@ @@@@@@
@@@@@@@@ @@@@@@@@
@@@@@@@@@ @@@@@@@@@
@@@@@@@ @@@@@@@
// 半压缩版本
char *
E = ">ADH=CEB<BDEBCV[;FHBMBTM:OEDGDIY9PBDBEJBJDCEDBD",
*B =
"B(R9Sa?^'YL*OG'YPII'=\\>Y,6%=a??PE-J=_\\N(7<^W@*UA[3Yb1'2+':`0%TX;H3?>="
"F3_C3_CD$TD<(+1O7$E&=1'b/"
"Z'':LMP#EN<8='EN<(V'E>1X%D[XHV'EZE.'E&ES*'7&%.J\\C+<1*'L7S$4-(?3GOS)')@"
"^"
"CCPb+_UR'V\\B+9Ia1FJE^(_UU_(_Y*K^8*@R]K^8*@R]K^8*`573R]8_A7%#",
*F;
d, l, r, c, h, b, i, H[512], N = 35;
main() {
i = 512;
while (--i)
H[i] = -1;
d = l = r = i = 0;
for (; E[i]; i++) {
c = E[i] - 65;
if (c < 0) {
r = -c;
l = 0;
d *= 2;
} else {
l += c;
c = 1;
for (b = 0; b < r; b++) {
c = c * 2 + (d >> (r - b - 1)) % 2;
}
H[c] = l;
d++;
}
}
F = B + 225;
d = l = r = c = 0;
while (B < F) { // outer loop: for each rle
h = 1; // Node ID in tree
while (H[h] < 0) { // inner loop: for each bit
// < 0 == not leaf
if (!l--) {
d = *B++ - N;
d |= (*B++ - N) << 6;
d |= (*B++ - N) << 12;
l = 17;
}
h = h * 2 + d % 2;
d /= 2;
}
b = H[h];
// printf("%i ", b);
for (i = 0; i < b; i++)
putchar(c ? '@' : ' ');
c = !c;
if (!b) {
c = 0;
putchar('\n');
}
}
}
// 全压缩版本
char*E=">ADH=CEB<BDEBCV[;FHBMBTM:OEDGDIY9PBDBEJBJDCEDBD",*B="B(R9Sa?^'YL*OG'YPII'=\\>Y,6%=a??PE-J=_\\N(7<^W@*UA[3Yb1'2+':`0%TX;H3?>=F3_C3_CD$TD<(+1O7$E&=1'b/Z'':LMP#EN<8='EN<(V'E>1X%D[XHV'EZE.'E&ES*'7&%.J\\C+<1*'L7S$4-(?3GOS)')@^CCPb+_UR'V\\B+9Ia1FJE^(_UU_(_Y*K^8*@R]K^8*@R]K^8*`573R]8_A7%#",*F;d,l,r,c,h,b,i,H[512];main(){i=512;while(--i)H[i]=-1;d=l=r=i=0;for(;E[i];i++){c=E[i]-65;if(c<0){r=-c;l=0;d*=2;}else{l+=c;c=1;for(int i=0;i<r;i++){c=c*2+(d>>(r-i-1))%2;}H[c]=l;d++;}}F=B+225;d=l=r=c=0;while(B<F){h=1;while(H[h]<0){if(!l--){d=*B++-35;d|=(*B++-35)<<6;d|=(*B++-35)<<12;l=17;}h=h*2+d%2;d/=2;}b=H[h];for(i=0;i<b;i++)putchar(c?'@':' ');c=!c;if(!b){c=0;putchar('\n');}}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment