Skip to content

Instantly share code, notes, and snippets.

@oliwarner
Last active August 29, 2015 14:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oliwarner/7c5a88b175abfe1b9427 to your computer and use it in GitHub Desktop.
Save oliwarner/7c5a88b175abfe1b9427 to your computer and use it in GitHub Desktop.
Set vs .startswith vs Regex
All get the same result but Sets are the clear winner, 300× faster than a big regex, 5400× faster than iterative string comparison.
$ python test.py
Sets: 9500
5 function calls in 0.003 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.000 0.000 :0(setprofile)
1 0.000 0.000 0.003 0.003 <string>:1(<module>)
0 0.000 0.000 profile:0(profiler)
1 0.000 0.000 0.003 0.003 profile:0(sets())
1 0.000 0.000 0.003 0.003 test.py:11(__call__)
1 0.003 0.003 0.003 0.003 test.py:24(sets)
Startswith: 500
4875255 function calls in 18.057 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.000 0.000 :0(setprofile)
4875250 8.919 0.000 8.919 0.000 :0(startswith)
1 0.000 0.000 18.056 18.056 <string>:1(<module>)
0 0.000 0.000 profile:0(profiler)
1 0.000 0.000 18.057 18.057 profile:0(startswith())
1 0.000 0.000 18.056 18.056 test.py:11(__call__)
1 9.137 9.137 18.056 18.056 test.py:34(startswith)
Regex: 9500
214564 function calls (211563 primitive calls) in 1.038 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
51509 0.118 0.000 0.118 0.000 :0(append)
1 0.000 0.000 0.000 0.000 :0(compile)
1 0.000 0.000 0.000 0.000 :0(get)
17507 0.027 0.000 0.027 0.000 :0(isinstance)
1 0.000 0.000 0.000 0.000 :0(items)
1 0.000 0.000 0.000 0.000 :0(join)
39510/39509 0.071 0.000 0.071 0.000 :0(len)
10000 0.052 0.000 0.052 0.000 :0(match)
500 0.000 0.000 0.000 0.000 :0(max)
2502 0.003 0.000 0.003 0.000 :0(min)
16000 0.062 0.000 0.062 0.000 :0(ord)
1 0.000 0.000 0.000 0.000 :0(setprofile)
1 0.000 0.000 1.038 1.038 <string>:1(<module>)
0 0.000 0.000 profile:0(profiler)
1 0.000 0.000 1.038 1.038 profile:0(regexes())
1 0.000 0.000 0.970 0.970 re.py:188(compile)
1 0.001 0.001 0.970 0.970 re.py:226(_compile)
1001/1 0.166 0.000 0.348 0.348 sre_compile.py:32(_compile)
1 0.000 0.000 0.023 0.023 sre_compile.py:359(_compile_info)
2 0.000 0.000 0.000 0.000 sre_compile.py:472(isstring)
1 0.000 0.000 0.371 0.371 sre_compile.py:478(_code)
1 0.000 0.000 0.969 0.969 sre_compile.py:493(compile)
4 0.000 0.000 0.000 0.000 sre_parse.py:126(__len__)
17504 0.055 0.000 0.082 0.000 sre_parse.py:130(__getitem__)
16501 0.065 0.000 0.090 0.000 sre_parse.py:138(append)
1001/1 0.020 0.000 0.023 0.023 sre_parse.py:140(getwidth)
1 0.000 0.000 0.000 0.000 sre_parse.py:178(__init__)
18502 0.123 0.000 0.183 0.000 sre_parse.py:182(__next)
3500 0.009 0.000 0.028 0.000 sre_parse.py:195(match)
16502 0.074 0.000 0.238 0.000 sre_parse.py:201(get)
501/1 0.019 0.000 0.598 0.598 sre_parse.py:301(_parse_sub)
1000/500 0.156 0.000 0.584 0.001 sre_parse.py:379(_parse)
1 0.000 0.000 0.000 0.000 sre_parse.py:67(__init__)
1 0.000 0.000 0.598 0.598 sre_parse.py:675(parse)
1001 0.002 0.000 0.002 0.000 sre_parse.py:90(__init__)
1 0.000 0.000 1.038 1.038 test.py:11(__call__)
1 0.016 0.016 1.038 1.038 test.py:49(regexes)
import os
import profile
class memoize:
# from http://avinashv.net/2008/04/python-decorators-syntactic-sugar/
def __init__(self, function):
self.function = function
self.memoized = {}
def __call__(self, *args):
try:
return self.memoized[args]
except KeyError:
self.memoized[args] = self.function(*args)
return self.memoized[args]
dir_listing = [os.urandom(16).encode('hex') for i in range(10000)]
keep_files = dir_listing[1100:1350] + dir_listing[8700:8950]
@memoize
def sets():
count = 0
keeps = set(keep_files)
for f in dir_listing:
if not f in keeps:
count += 1
print 'Sets: ', count
@memoize
def startswith():
count = 0
for f in dir_listing:
found = False
for f2 in keep_files:
if f.startswith(f2):
found = True
break
if not found:
count += 1
print 'Startswith: ', count
@memoize
def regexes():
count = 0
import re
reg = re.compile('(?:%s)' % ')|(?:'.join(keep_files))
for f in dir_listing:
if not reg.match(f):
count += 1
print 'Regex:', count
if __name__ == '__main__':
profile.run('sets()')
profile.run('startswith()')
profile.run('regexes()')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment