Skip to content

Instantly share code, notes, and snippets.

@stuarteberg
Last active February 8, 2021 03:59
Show Gist options
  • Save stuarteberg/13bc5fba55b2c4ea88da30e67490fad7 to your computer and use it in GitHub Desktop.
Save stuarteberg/13bc5fba55b2c4ea88da30e67490fad7 to your computer and use it in GitHub Desktop.
Python code (with IPython timing) for comparing rle implementations.
#
# Python code (with IPython timing) for comparing rle implementations.
# See: https://twitter.com/double_thunk/status/1358509387044827138?s=20
#
import random
import string
import platform
from itertools import groupby
import numpy as np
import pandas as pd
def rle_numpy(text):
a = np.fromiter(text, '<U1')
starts = 1 + (a[1:] != a[:-1]).nonzero()[0]
letters = [a[0], *a[starts].tolist()]
counts = np.diff(starts, 1, 0, 0, len(a))
return [*zip(letters, counts.tolist())]
def rle_itertools(text):
return [(c, sum(1 for _ in g)) for c, g in groupby(text)]
def rle_beazley(items):
if len(items) == 0:
return []
result = []
prev_item = items[0]
count = 0
for item in items:
if item == prev_item:
count += 1
else:
result.append((prev_item, count))
prev_item = item
count = 1
if count > 0:
result.append((prev_item, count))
return result
def rle_pandas(items):
s = pd.Series(list(items))
s = s[s.ne(s.shift(-1))]
c = s.index.to_series() + 1
c -= c.shift(fill_value=0)
return list(zip(s.tolist(), c.tolist()))
counts = np.random.randint(10, size=1_000_000)
letters = [random.choice(string.ascii_lowercase) for _ in counts]
text = ''.join(l*c for l,c in zip(letters, counts))[:1_000_000]
print(f"Input head: {text[:50]}")
assert rle_itertools(text) == rle_numpy(text) == rle_beazley(text)
print("-------------------")
print(f"Testing on {platform.python_implementation()}")
print("-------------------")
print("\nrle_itertools:")
%timeit r = rle_itertools(text)
print("\nrle_numpy:")
%timeit r = rle_numpy(text)
print("\nrle_beazley:")
%timeit r = rle_beazley(text)
print("\nrle_pandas:")
%timeit r = rle_pandas(text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment