Created
August 6, 2018 18:11
-
-
Save user3483203/e37990716ac6b1b01c3de461baee1004 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
""" Find all "words" of lowercase chars in a string | |
Speed tests, using the timeit module, of various approaches | |
See https://stackoverflow.com/q/51710087 | |
Written by Ajax1234, PM 2Ring, Kevin, and user3483203 | |
2018.08.07 | |
""" | |
import re | |
from string import ascii_lowercase, printable | |
from timeit import Timer | |
from timeit import repeat | |
from random import seed, choice | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
seed(17) | |
# A collection of chars with lots of lowercase | |
# letters to use for making random words | |
test_chars = 5 * ascii_lowercase + printable | |
def randword(n): | |
""" Make a random "word" of n chars.""" | |
return ''.join([choice(test_chars) for _ in range(n)]) | |
# Create a translation table that maps all ASCII chars | |
# except lowercase letters to space | |
bad = bytes(set(range(128)) - set(ascii_lowercase.encode())) | |
table = dict.fromkeys(bad, ' ') | |
def find_lower_pm2r(s, table=table): | |
""" Translate non-lowercase chars to space """ | |
return s.translate(table).split() | |
def find_lower_pm2r_byte(s): | |
""" Convert to bytes & test the ASCII code to see if it's in range """ | |
return bytes(b if 97 <= b <= 122 else 32 for b in s.encode()).decode().split() | |
def find_lower_ajax(s): | |
""" Use a regex """ | |
return re.findall('[a-z]+', s) | |
def find_lower_kevin(s): | |
""" Use the str.islower method """ | |
return "".join([c if c.islower() else " " for c in s]).split() | |
lwr = set(ascii_lowercase) | |
def find_lower_3483203(s, lwr=lwr): | |
""" Test using a set """ | |
return ''.join([i if i in lwr else ' ' for i in s]).split() | |
functions = ( | |
find_lower_ajax, | |
find_lower_pm2r, | |
find_lower_pm2r_byte, | |
find_lower_kevin, | |
find_lower_3483203, | |
) | |
def verify(data, verbose=False): | |
""" Check that all functions give the same results """ | |
if verbose: | |
print('Verifying:', repr(data)) | |
results = [] | |
for func in functions: | |
result = func(data) | |
results.append(result) | |
if verbose: | |
print('{:20} : {}'.format(func.__name__, result)) | |
head, *tail = results | |
return all(u == head for u in tail) | |
# Check that all functions perform correctly | |
datalen = 8 | |
data = ' '.join([randword(8) for _ in range(datalen)]) | |
print(verify(data, True), '\n') | |
# Time and plot it! | |
res = pd.DataFrame( | |
index=['find_lower_ajax', 'find_lower_pm2r', 'find_lower_pm2r_byte', 'find_lower_kevin', 'find_lower_3483203'], | |
columns=[10, 50, 100, 500, 1000], | |
dtype=float | |
) | |
for c in res.columns: | |
s = ' '.join([randword(8) for _ in range(c)]) | |
print('len', c, verify(data, False)) | |
for f in res.index: | |
stmt = '{}(s)'.format(f) | |
setp = 'from __main__ import s, lwr, bad, table, {}'.format(f) | |
t = repeat(stmt, setp, repeat=3, number=50) | |
print('{:20} : {:.6f}, {:.6f}, {:.6f}'.format(f, *t)) | |
res.at[f, c] = pd.np.mean(t) | |
print() | |
ax = res.div(res.min()).T.plot(loglog=True) | |
ax.set_xlabel("N"); | |
ax.set_ylabel("time (relative)"); | |
plt.show() | |
# Output: | |
''' | |
Verifying: '3c/zpws% OO8Dtcgl u;Zdm{y. dx]JTyjb pj;+ ym\t O6d.Jbg8 f\tRxrbau z`rxnkI:' | |
find_lower_ajax : ['c', 'zpws', 'tcgl', 'u', 'dm', 'y', 'dx', 'yjb', 'pj', 'ym', 'd', 'bg', 'f', 'xrbau', 'z', 'rxnk'] | |
find_lower_pm2r : ['c', 'zpws', 'tcgl', 'u', 'dm', 'y', 'dx', 'yjb', 'pj', 'ym', 'd', 'bg', 'f', 'xrbau', 'z', 'rxnk'] | |
find_lower_pm2r_byte : ['c', 'zpws', 'tcgl', 'u', 'dm', 'y', 'dx', 'yjb', 'pj', 'ym', 'd', 'bg', 'f', 'xrbau', 'z', 'rxnk'] | |
find_lower_kevin : ['c', 'zpws', 'tcgl', 'u', 'dm', 'y', 'dx', 'yjb', 'pj', 'ym', 'd', 'bg', 'f', 'xrbau', 'z', 'rxnk'] | |
find_lower_3483203 : ['c', 'zpws', 'tcgl', 'u', 'dm', 'y', 'dx', 'yjb', 'pj', 'ym', 'd', 'bg', 'f', 'xrbau', 'z', 'rxnk'] | |
True | |
len 10 True | |
find_lower_ajax : 0.000287, 0.000282, 0.000281 | |
find_lower_pm2r : 0.000175, 0.000171, 0.000171 | |
find_lower_pm2r_byte : 0.000629, 0.000624, 0.000623 | |
find_lower_kevin : 0.000465, 0.000460, 0.000460 | |
find_lower_3483203 : 0.000587, 0.000323, 0.000322 | |
len 50 True | |
find_lower_ajax : 0.001285, 0.001278, 0.001277 | |
find_lower_pm2r : 0.000473, 0.000458, 0.000457 | |
find_lower_pm2r_byte : 0.003074, 0.002953, 0.003136 | |
find_lower_kevin : 0.002359, 0.002297, 0.002314 | |
find_lower_3483203 : 0.001687, 0.001583, 0.001639 | |
len 100 True | |
find_lower_ajax : 0.002572, 0.002569, 0.002618 | |
find_lower_pm2r : 0.000783, 0.000754, 0.000754 | |
find_lower_pm2r_byte : 0.006119, 0.006256, 0.006101 | |
find_lower_kevin : 0.004519, 0.004684, 0.004902 | |
find_lower_3483203 : 0.003161, 0.003116, 0.003112 | |
len 500 True | |
find_lower_ajax : 0.012646, 0.012850, 0.013184 | |
find_lower_pm2r : 0.003271, 0.003118, 0.003192 | |
find_lower_pm2r_byte : 0.030948, 0.032571, 0.032342 | |
find_lower_kevin : 0.023310, 0.023077, 0.023371 | |
find_lower_3483203 : 0.015499, 0.015744, 0.015676 | |
len 1000 True | |
find_lower_ajax : 0.025057, 0.025369, 0.023719 | |
find_lower_pm2r : 0.006783, 0.006253, 0.006325 | |
find_lower_pm2r_byte : 0.063372, 0.062903, 0.061966 | |
find_lower_kevin : 0.046614, 0.047319, 0.045840 | |
find_lower_3483203 : 0.032688, 0.031823, 0.032017 | |
''' | |
Author
user3483203
commented
Aug 6, 2018
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment