Skip to content

Instantly share code, notes, and snippets.

@user3483203
Created August 6, 2018 18:11
Show Gist options
  • Save user3483203/e37990716ac6b1b01c3de461baee1004 to your computer and use it in GitHub Desktop.
Save user3483203/e37990716ac6b1b01c3de461baee1004 to your computer and use it in GitHub Desktop.
#! /usr/bin/env python3
""" Find all "words" of lowercase chars in a string
Speed tests, using the timeit module, of various approaches
See https://stackoverflow.com/q/51710087
Written by Ajax1234, PM 2Ring, Kevin, and user3483203
2018.08.07
"""
import re
from string import ascii_lowercase, printable
from timeit import Timer
from timeit import repeat
from random import seed, choice
import pandas as pd
import matplotlib.pyplot as plt
seed(17)
# A collection of chars with lots of lowercase
# letters to use for making random words
test_chars = 5 * ascii_lowercase + printable
def randword(n):
""" Make a random "word" of n chars."""
return ''.join([choice(test_chars) for _ in range(n)])
# Create a translation table that maps all ASCII chars
# except lowercase letters to space
bad = bytes(set(range(128)) - set(ascii_lowercase.encode()))
table = dict.fromkeys(bad, ' ')
def find_lower_pm2r(s, table=table):
""" Translate non-lowercase chars to space """
return s.translate(table).split()
def find_lower_pm2r_byte(s):
""" Convert to bytes & test the ASCII code to see if it's in range """
return bytes(b if 97 <= b <= 122 else 32 for b in s.encode()).decode().split()
def find_lower_ajax(s):
""" Use a regex """
return re.findall('[a-z]+', s)
def find_lower_kevin(s):
""" Use the str.islower method """
return "".join([c if c.islower() else " " for c in s]).split()
lwr = set(ascii_lowercase)
def find_lower_3483203(s, lwr=lwr):
""" Test using a set """
return ''.join([i if i in lwr else ' ' for i in s]).split()
functions = (
find_lower_ajax,
find_lower_pm2r,
find_lower_pm2r_byte,
find_lower_kevin,
find_lower_3483203,
)
def verify(data, verbose=False):
""" Check that all functions give the same results """
if verbose:
print('Verifying:', repr(data))
results = []
for func in functions:
result = func(data)
results.append(result)
if verbose:
print('{:20} : {}'.format(func.__name__, result))
head, *tail = results
return all(u == head for u in tail)
# Check that all functions perform correctly
datalen = 8
data = ' '.join([randword(8) for _ in range(datalen)])
print(verify(data, True), '\n')
# Time and plot it!
res = pd.DataFrame(
index=['find_lower_ajax', 'find_lower_pm2r', 'find_lower_pm2r_byte', 'find_lower_kevin', 'find_lower_3483203'],
columns=[10, 50, 100, 500, 1000],
dtype=float
)
for c in res.columns:
s = ' '.join([randword(8) for _ in range(c)])
print('len', c, verify(data, False))
for f in res.index:
stmt = '{}(s)'.format(f)
setp = 'from __main__ import s, lwr, bad, table, {}'.format(f)
t = repeat(stmt, setp, repeat=3, number=50)
print('{:20} : {:.6f}, {:.6f}, {:.6f}'.format(f, *t))
res.at[f, c] = pd.np.mean(t)
print()
ax = res.div(res.min()).T.plot(loglog=True)
ax.set_xlabel("N");
ax.set_ylabel("time (relative)");
plt.show()
# Output:
'''
Verifying: '3c/zpws% OO8Dtcgl u;Zdm{y. dx]JTyjb pj;+ ym\t O6d.Jbg8 f\tRxrbau z`rxnkI:'
find_lower_ajax : ['c', 'zpws', 'tcgl', 'u', 'dm', 'y', 'dx', 'yjb', 'pj', 'ym', 'd', 'bg', 'f', 'xrbau', 'z', 'rxnk']
find_lower_pm2r : ['c', 'zpws', 'tcgl', 'u', 'dm', 'y', 'dx', 'yjb', 'pj', 'ym', 'd', 'bg', 'f', 'xrbau', 'z', 'rxnk']
find_lower_pm2r_byte : ['c', 'zpws', 'tcgl', 'u', 'dm', 'y', 'dx', 'yjb', 'pj', 'ym', 'd', 'bg', 'f', 'xrbau', 'z', 'rxnk']
find_lower_kevin : ['c', 'zpws', 'tcgl', 'u', 'dm', 'y', 'dx', 'yjb', 'pj', 'ym', 'd', 'bg', 'f', 'xrbau', 'z', 'rxnk']
find_lower_3483203 : ['c', 'zpws', 'tcgl', 'u', 'dm', 'y', 'dx', 'yjb', 'pj', 'ym', 'd', 'bg', 'f', 'xrbau', 'z', 'rxnk']
True
len 10 True
find_lower_ajax : 0.000287, 0.000282, 0.000281
find_lower_pm2r : 0.000175, 0.000171, 0.000171
find_lower_pm2r_byte : 0.000629, 0.000624, 0.000623
find_lower_kevin : 0.000465, 0.000460, 0.000460
find_lower_3483203 : 0.000587, 0.000323, 0.000322
len 50 True
find_lower_ajax : 0.001285, 0.001278, 0.001277
find_lower_pm2r : 0.000473, 0.000458, 0.000457
find_lower_pm2r_byte : 0.003074, 0.002953, 0.003136
find_lower_kevin : 0.002359, 0.002297, 0.002314
find_lower_3483203 : 0.001687, 0.001583, 0.001639
len 100 True
find_lower_ajax : 0.002572, 0.002569, 0.002618
find_lower_pm2r : 0.000783, 0.000754, 0.000754
find_lower_pm2r_byte : 0.006119, 0.006256, 0.006101
find_lower_kevin : 0.004519, 0.004684, 0.004902
find_lower_3483203 : 0.003161, 0.003116, 0.003112
len 500 True
find_lower_ajax : 0.012646, 0.012850, 0.013184
find_lower_pm2r : 0.003271, 0.003118, 0.003192
find_lower_pm2r_byte : 0.030948, 0.032571, 0.032342
find_lower_kevin : 0.023310, 0.023077, 0.023371
find_lower_3483203 : 0.015499, 0.015744, 0.015676
len 1000 True
find_lower_ajax : 0.025057, 0.025369, 0.023719
find_lower_pm2r : 0.006783, 0.006253, 0.006325
find_lower_pm2r_byte : 0.063372, 0.062903, 0.061966
find_lower_kevin : 0.046614, 0.047319, 0.045840
find_lower_3483203 : 0.032688, 0.031823, 0.032017
'''
@user3483203
Copy link
Author

results

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment