Skip to content

Instantly share code, notes, and snippets.

@espio999
Created September 2, 2023 14:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save espio999/41c542dc59d40d163201ff7f1744e7b7 to your computer and use it in GitHub Desktop.
Save espio999/41c542dc59d40d163201ff7f1744e7b7 to your computer and use it in GitHub Desktop.
chatLZMA
import lzma
import nltk
import random
nltk.download('reuters')
nltk.download('brown')
nltk.download('gutenberg')
my_filters = [
{"id": lzma.FILTER_LZMA2, "preset": 9 | lzma.PRESET_EXTREME},
]
lzc = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=my_filters)
corp = nltk.corpus.reuters.raw().encode()
out1 = lzc.compress(corp)
corp = ' '.join(nltk.corpus.brown.words()).encode()
out2 = lzc.compress(corp)
corp = nltk.corpus.gutenberg.raw().encode()
out3 = lzc.compress(corp)
out_end = lzc.flush()
lzd = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=my_filters)
lzd.decompress(out1)
lzd.decompress(out2)
lzd.decompress(out3)
lzd.decompress(out_end[:-50])
i = 0
while True:
try:
print(i, '\t', lzd.decompress(random.randbytes(10)).decode(errors="ignore"))
i += 1
except Exception as e:
print(e)
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment