-
-
Save oliwarner/7c5a88b175abfe1b9427 to your computer and use it in GitHub Desktop.
Set vs .startswith vs Regex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
All get the same result but Sets are the clear winner, 300× faster than a big regex, 5400× faster than iterative string comparison. | |
$ python test.py | |
Sets: 9500 | |
5 function calls in 0.003 seconds | |
Ordered by: standard name | |
ncalls tottime percall cumtime percall filename:lineno(function) | |
1 0.000 0.000 0.000 0.000 :0(setprofile) | |
1 0.000 0.000 0.003 0.003 <string>:1(<module>) | |
0 0.000 0.000 profile:0(profiler) | |
1 0.000 0.000 0.003 0.003 profile:0(sets()) | |
1 0.000 0.000 0.003 0.003 test.py:11(__call__) | |
1 0.003 0.003 0.003 0.003 test.py:24(sets) | |
Startswith: 500 | |
4875255 function calls in 18.057 seconds | |
Ordered by: standard name | |
ncalls tottime percall cumtime percall filename:lineno(function) | |
1 0.000 0.000 0.000 0.000 :0(setprofile) | |
4875250 8.919 0.000 8.919 0.000 :0(startswith) | |
1 0.000 0.000 18.056 18.056 <string>:1(<module>) | |
0 0.000 0.000 profile:0(profiler) | |
1 0.000 0.000 18.057 18.057 profile:0(startswith()) | |
1 0.000 0.000 18.056 18.056 test.py:11(__call__) | |
1 9.137 9.137 18.056 18.056 test.py:34(startswith) | |
Regex: 9500 | |
214564 function calls (211563 primitive calls) in 1.038 seconds | |
Ordered by: standard name | |
ncalls tottime percall cumtime percall filename:lineno(function) | |
51509 0.118 0.000 0.118 0.000 :0(append) | |
1 0.000 0.000 0.000 0.000 :0(compile) | |
1 0.000 0.000 0.000 0.000 :0(get) | |
17507 0.027 0.000 0.027 0.000 :0(isinstance) | |
1 0.000 0.000 0.000 0.000 :0(items) | |
1 0.000 0.000 0.000 0.000 :0(join) | |
39510/39509 0.071 0.000 0.071 0.000 :0(len) | |
10000 0.052 0.000 0.052 0.000 :0(match) | |
500 0.000 0.000 0.000 0.000 :0(max) | |
2502 0.003 0.000 0.003 0.000 :0(min) | |
16000 0.062 0.000 0.062 0.000 :0(ord) | |
1 0.000 0.000 0.000 0.000 :0(setprofile) | |
1 0.000 0.000 1.038 1.038 <string>:1(<module>) | |
0 0.000 0.000 profile:0(profiler) | |
1 0.000 0.000 1.038 1.038 profile:0(regexes()) | |
1 0.000 0.000 0.970 0.970 re.py:188(compile) | |
1 0.001 0.001 0.970 0.970 re.py:226(_compile) | |
1001/1 0.166 0.000 0.348 0.348 sre_compile.py:32(_compile) | |
1 0.000 0.000 0.023 0.023 sre_compile.py:359(_compile_info) | |
2 0.000 0.000 0.000 0.000 sre_compile.py:472(isstring) | |
1 0.000 0.000 0.371 0.371 sre_compile.py:478(_code) | |
1 0.000 0.000 0.969 0.969 sre_compile.py:493(compile) | |
4 0.000 0.000 0.000 0.000 sre_parse.py:126(__len__) | |
17504 0.055 0.000 0.082 0.000 sre_parse.py:130(__getitem__) | |
16501 0.065 0.000 0.090 0.000 sre_parse.py:138(append) | |
1001/1 0.020 0.000 0.023 0.023 sre_parse.py:140(getwidth) | |
1 0.000 0.000 0.000 0.000 sre_parse.py:178(__init__) | |
18502 0.123 0.000 0.183 0.000 sre_parse.py:182(__next) | |
3500 0.009 0.000 0.028 0.000 sre_parse.py:195(match) | |
16502 0.074 0.000 0.238 0.000 sre_parse.py:201(get) | |
501/1 0.019 0.000 0.598 0.598 sre_parse.py:301(_parse_sub) | |
1000/500 0.156 0.000 0.584 0.001 sre_parse.py:379(_parse) | |
1 0.000 0.000 0.000 0.000 sre_parse.py:67(__init__) | |
1 0.000 0.000 0.598 0.598 sre_parse.py:675(parse) | |
1001 0.002 0.000 0.002 0.000 sre_parse.py:90(__init__) | |
1 0.000 0.000 1.038 1.038 test.py:11(__call__) | |
1 0.016 0.016 1.038 1.038 test.py:49(regexes) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import profile | |
class memoize: | |
# from http://avinashv.net/2008/04/python-decorators-syntactic-sugar/ | |
def __init__(self, function): | |
self.function = function | |
self.memoized = {} | |
def __call__(self, *args): | |
try: | |
return self.memoized[args] | |
except KeyError: | |
self.memoized[args] = self.function(*args) | |
return self.memoized[args] | |
dir_listing = [os.urandom(16).encode('hex') for i in range(10000)] | |
keep_files = dir_listing[1100:1350] + dir_listing[8700:8950] | |
@memoize | |
def sets(): | |
count = 0 | |
keeps = set(keep_files) | |
for f in dir_listing: | |
if not f in keeps: | |
count += 1 | |
print 'Sets: ', count | |
@memoize | |
def startswith(): | |
count = 0 | |
for f in dir_listing: | |
found = False | |
for f2 in keep_files: | |
if f.startswith(f2): | |
found = True | |
break | |
if not found: | |
count += 1 | |
print 'Startswith: ', count | |
@memoize | |
def regexes(): | |
count = 0 | |
import re | |
reg = re.compile('(?:%s)' % ')|(?:'.join(keep_files)) | |
for f in dir_listing: | |
if not reg.match(f): | |
count += 1 | |
print 'Regex:', count | |
if __name__ == '__main__': | |
profile.run('sets()') | |
profile.run('startswith()') | |
profile.run('regexes()') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment