Created
June 9, 2011 20:08
-
-
Save waylan/1017594 to your computer and use it in GitHub Desktop.
Test the speed of a few different regex methods to escape a known set of chars (as per markdown syntax).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
# This is using html entities. Not sure if I want to use these. | |
# But this shouldn't effect the times. | |
escape_table = { | |
'\\': '\', # backslash | |
'`' : '`', # backtick | |
'*' : '*', # asterisk | |
'_' : '_', # underscore | |
'{' : '{', # left curly brace | |
'}' : '}', # right curly brace | |
'[' : '[', # left square bracket | |
']' : ']', # right square bracket | |
'(' : '(', # left parenthesis | |
')' : ')', # right parenthesis | |
'#' : '#', # hash mark | |
'+' : '+', # plus sign | |
'-' : '-', # minus sign (hyphen) | |
'.' : '.', # dot | |
'!' : '!', # exclamation mark | |
} | |
text = r"""Some text\: with \*escaped\* characters\. \[Foo\]\(bar\) \#baz \+blah. Who\\what? Oh yeah\!""" | |
RE_BASE = r'\\(%s)' | |
def escape(m): | |
return escape_table[m.group(1)] | |
def test1(): | |
""" One big regex: '(\\|`|*|_|...)' """ | |
RE = RE_BASE % '|'.join(['\\%s'% k for k in escape_table.keys()]) | |
return re.sub(RE, escape, text) | |
def test2(): | |
""" Seperate regex for each: '(\\)', '(`)', '(*)', ... """ | |
t = text | |
for k, v in escape_table.items(): | |
t = re.sub(RE_BASE % ('\\%s' % k), v, t) | |
return t | |
RE = re.compile(RE_BASE % '|'.join(['\\%s'% k for k in escape_table.keys()])) | |
def test3(): # <== fastest of the complex regex | |
""" Precompiled of test1 """ | |
return RE.sub(escape, text) | |
def escape2(m): | |
try: | |
return escape_table[m.group(1)] | |
except KeyError: | |
return None | |
def test4(): | |
""" Simple Regex with Try block.""" | |
return re.sub(r'\\(.)', escape2, text) | |
def escape3(m): | |
if escape_table.has_key(m.group(1)): | |
return escape_table[m.group(1)] | |
def test5(): | |
""" Simple Regex with if block """ | |
return re.sub(r'\\(.)', escape3, text) | |
SIMPLE_RE = re.compile(r'\\(.)') | |
def escape4(m): | |
return escape_table.get(m.group(1)) | |
def test6(): | |
""" Precompiled Simple Regex with dict.get. """ | |
return SIMPLE_RE.sub(escape4, text) | |
if __name__ == '__main__': | |
from timeit import repeat | |
n = 10000 | |
print "One:", repeat("test1()", "from __main__ import test1", number=n) | |
print test1() | |
print "Precompiled:", repeat("test3()", "from __main__ import test3", number=n) | |
print test2() | |
print "Many:", repeat("test2()", "from __main__ import test2", number=n) | |
print test2() | |
print "SimpleR:", repeat("test4()", "from __main__ import test4", number=n) | |
print test2() | |
print "Simple2:", repeat("test5()", "from __main__ import test5", number=n) | |
print test2() | |
print "SimplePC:", repeat("test6()", "from __main__ import test6", number=n) | |
print test2() | |
""" | |
Output: | |
One: [0.4685649591499173, 0.4662997493859957, 0.46501345062068733] | |
Some text\: with *escaped* characters. [Foo](bar) &# | |
35;baz +blah. Who\what? Oh yeah! | |
Precompiled: [0.2757023256979598, 0.2745586563747848, 0.2683399602359111] | |
Some text\: with *escaped* characters. [Foo](bar) &# | |
35;baz +blah. Who\what? Oh yeah! | |
Many: [1.891783361569178, 1.8785724498354304, 1.8913456742352093] | |
Some text\: with *escaped* characters. [Foo](bar) &# | |
35;baz +blah. Who\what? Oh yeah! | |
SimpleR: [0.4508724802432633, 0.44857544751307366, 0.44608369772609535] | |
Some text\: with *escaped* characters. [Foo](bar) &# | |
35;baz +blah. Who\what? Oh yeah! | |
Simple2: [0.42462196381133843, 0.4274122523515942, 0.42898751942258606] | |
Some text\: with *escaped* characters. [Foo](bar) &# | |
35;baz +blah. Who\what? Oh yeah! | |
SimplePC: [0.30361692646985006, 0.2933059395519013, 0.2931344393764732] | |
Some text\: with *escaped* characters. [Foo](bar) &# | |
35;baz +blah. Who\what? Oh yeah! | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment