Skip to content

Instantly share code, notes, and snippets.

@purpleP
Created Jan 5, 2017
Embed
What would you like to do?
comparing different string search methods
----------------------------------------------------------------------------------- benchmark: 3 tests -----------------------------------------------------------------------------------
Name (time in us) Min Max Mean StdDev Median IQR Outliers(*) Rounds Iterations
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_search[re_compile] 452.1870 (1.0) 1,199.2530 (1.13) 471.1449 (1.0) 56.5338 (1.0) 455.0175 (1.0) 3.9840 (1.0) 120;321 1844 1
test_search[no_compile] 521.2920 (1.15) 1,059.7880 (1.0) 554.4658 (1.18) 92.9603 (1.64) 526.5430 (1.16) 5.2185 (1.31) 28;75 388 1
test_search[endswith] 1,601.1760 (3.54) 3,252.0090 (3.07) 1,691.4124 (3.59) 246.1582 (4.35) 1,604.0630 (3.53) 38.6730 (9.71) 39;102 520 1
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
import random as rnd
from itertools import cycle, product
import re
import pytest
def search_re_no_compile(search_items, strings_to_search):
p = '(' + '|'.join(search_items) + ')$'
return tuple(re.search(p, s) for s in strings_to_search)
def search_re_compile(search_items, strings_to_search):
p = re.compile('(' + '|'.join(search_items) + ')$')
return tuple(p.search(s) for s in strings_to_search)
def search_simple(search_items, strings_to_search):
return tuple(
next((True for si in search_items if s.endswith(si)), False)
for s in strings_to_search
)
search_items = tuple(str(1000 + i) for i in range(100))
random_data = tuple(str(rnd.randint(0, 100000)) * 100 for i in range(10000))
strings_to_search = tuple(
si + rd if should_match else si
for si, rd, should_match in
zip(search_items, random_data, cycle((True, False)))
)
@pytest.mark.parametrize(
'search_func',
(search_re_no_compile, search_re_compile, search_simple),
ids=('no_compile', 're_compile', 'endswith')
)
def test_search(benchmark, search_func):
benchmark(search_func, search_items, strings_to_search)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment