Skip to content

Instantly share code, notes, and snippets.

@tkuro11
Last active February 2, 2022 15:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tkuro11/5265da25fd296305141db600c27095d4 to your computer and use it in GitHub Desktop.
Save tkuro11/5265da25fd296305141db600c27095d4 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# runlength encoding - 4 types
def rle_naive(s):
o = _succ(s[-1]) # sentinel
_, prev, s = s.partition(s[0])
cnt = 1
for c in s+o:
if prev != c:
yield((prev, cnt))
cnt = 0
cnt += 1
prev = c
def rle_strip(s):
while s:
c = s[0]
striped = s.lstrip(c)
yield((c, len(s) - len(striped)))
s = striped
import numpy as np
def rle_numpy(s):
o = _succ(s[-1]) # sentinel
s = np.array(list(s+o))
idxs = np.append(-1, np.nonzero(s[1:] != s[:-1])[0])
return zip(s[idxs+1], idxs[1:] - idxs[:-1])
from itertools import compress
def rle_wierd(s):
o = _succ(s[-1]) # sentinel
return [(s[q],p-q) for p,q in
_pairwise(compress(range(len(s)+1),
(a!=b for a,b in _pairwise(s+o,s[0])))
)]
def rle_decode(code):
ret = ""
for c,n in code:
ret += str(c)*n
return ret
## utility routine
## (A, B, C, ..., Y, Z) -> ((0, A), (A,B), (B,C), ..., (Y,Z))
def _pairwise(seq, last = 0):
for c in seq:
yield(c, last)
last = c
# Next character in ASCII
def _succ(c):
return chr((ord(c)+1) % 255)
########## benchmark
if __name__ == "__main__":
import string
import random
def check(s):
import timeit
print(" # rle_naive" , timeit.timeit(lambda: list(rle_naive(s)), number=1000))
print(" # rle_strip" , timeit.timeit(lambda: list(rle_strip(s)), number=1000))
print(" # rle_numpy" , timeit.timeit(lambda: list(rle_numpy(s)), number=1000))
print(" # rle_wierd" , timeit.timeit(lambda: list(rle_wierd(s)), number=1000))
def generate_pattern(runlength, n = 10000):
return "".join((c*runlength for c in
random.choices(string.ascii_lowercase, k = n//runlength)))
print(rle_decode(rle_naive("***** --- run-length 1 (totally random) --- *****")))
check(generate_pattern(1))
print("-----------")
print(rle_decode(rle_strip("***** --- run-length 10 aaaa bbbb --- *****")))
check(generate_pattern(10))
print("-----------")
print(rle_decode(rle_numpy("***** --- run-length 100 aaaa bbbb --- ***** ")))
check(generate_pattern(100))
print(rle_decode(rle_wierd("***** --- run-length natural language --- ***** ")))
check(open("./scarlet_ibis.txt").read())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment