Skip to content

Instantly share code, notes, and snippets.

@waylan
Created June 9, 2011 20:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save waylan/1017594 to your computer and use it in GitHub Desktop.
Save waylan/1017594 to your computer and use it in GitHub Desktop.
Test the speed of a few different regex methods to escape a known set of chars (as per markdown syntax).
import re
# This is using html entities. Not sure if I want to use these.
# But this shouldn't effect the times.
escape_table = {
'\\': '\', # backslash
'`' : '`', # backtick
'*' : '*', # asterisk
'_' : '_', # underscore
'{' : '{', # left curly brace
'}' : '}', # right curly brace
'[' : '[', # left square bracket
']' : ']', # right square bracket
'(' : '(', # left parenthesis
')' : ')', # right parenthesis
'#' : '#', # hash mark
'+' : '+', # plus sign
'-' : '-', # minus sign (hyphen)
'.' : '.', # dot
'!' : '!', # exclamation mark
}
text = r"""Some text\: with \*escaped\* characters\. \[Foo\]\(bar\) \#baz \+blah. Who\\what? Oh yeah\!"""
RE_BASE = r'\\(%s)'
def escape(m):
return escape_table[m.group(1)]
def test1():
""" One big regex: '(\\|`|*|_|...)' """
RE = RE_BASE % '|'.join(['\\%s'% k for k in escape_table.keys()])
return re.sub(RE, escape, text)
def test2():
""" Seperate regex for each: '(\\)', '(`)', '(*)', ... """
t = text
for k, v in escape_table.items():
t = re.sub(RE_BASE % ('\\%s' % k), v, t)
return t
RE = re.compile(RE_BASE % '|'.join(['\\%s'% k for k in escape_table.keys()]))
def test3(): # <== fastest of the complex regex
""" Precompiled of test1 """
return RE.sub(escape, text)
def escape2(m):
try:
return escape_table[m.group(1)]
except KeyError:
return None
def test4():
""" Simple Regex with Try block."""
return re.sub(r'\\(.)', escape2, text)
def escape3(m):
if escape_table.has_key(m.group(1)):
return escape_table[m.group(1)]
def test5():
""" Simple Regex with if block """
return re.sub(r'\\(.)', escape3, text)
SIMPLE_RE = re.compile(r'\\(.)')
def escape4(m):
return escape_table.get(m.group(1))
def test6():
""" Precompiled Simple Regex with dict.get. """
return SIMPLE_RE.sub(escape4, text)
if __name__ == '__main__':
from timeit import repeat
n = 10000
print "One:", repeat("test1()", "from __main__ import test1", number=n)
print test1()
print
print "Precompiled:", repeat("test3()", "from __main__ import test3", number=n)
print test2()
print
print "Many:", repeat("test2()", "from __main__ import test2", number=n)
print test2()
print
print "SimpleR:", repeat("test4()", "from __main__ import test4", number=n)
print test2()
print
print "Simple2:", repeat("test5()", "from __main__ import test5", number=n)
print test2()
print
print "SimplePC:", repeat("test6()", "from __main__ import test6", number=n)
print test2()
print
"""
Output:
One: [0.4685649591499173, 0.4662997493859957, 0.46501345062068733]
Some text\: with &#42;escaped&#42; characters&#46; &#91;Foo&#93;&#40;bar&#41; &#
35;baz &#43;blah. Who&#92;what? Oh yeah&#33;
Precompiled: [0.2757023256979598, 0.2745586563747848, 0.2683399602359111]
Some text\: with &#42;escaped&#42; characters&#46; &#91;Foo&#93;&#40;bar&#41; &#
35;baz &#43;blah. Who&#92;what? Oh yeah&#33;
Many: [1.891783361569178, 1.8785724498354304, 1.8913456742352093]
Some text\: with &#42;escaped&#42; characters&#46; &#91;Foo&#93;&#40;bar&#41; &#
35;baz &#43;blah. Who&#92;what? Oh yeah&#33;
SimpleR: [0.4508724802432633, 0.44857544751307366, 0.44608369772609535]
Some text\: with &#42;escaped&#42; characters&#46; &#91;Foo&#93;&#40;bar&#41; &#
35;baz &#43;blah. Who&#92;what? Oh yeah&#33;
Simple2: [0.42462196381133843, 0.4274122523515942, 0.42898751942258606]
Some text\: with &#42;escaped&#42; characters&#46; &#91;Foo&#93;&#40;bar&#41; &#
35;baz &#43;blah. Who&#92;what? Oh yeah&#33;
SimplePC: [0.30361692646985006, 0.2933059395519013, 0.2931344393764732]
Some text\: with &#42;escaped&#42; characters&#46; &#91;Foo&#93;&#40;bar&#41; &#
35;baz &#43;blah. Who&#92;what? Oh yeah&#33;
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment