forslund/speed_test.py

## speed_test.py
import re
import time

words = [
    'where',
    'which',
    'when',
    'what',
    'that',
    'will',
    'from',
    'that',
    'also',
    'who',
    'how',
    'did',
    'and',
    'but',
    'the',
    'too',
    'why',
    'is',
    'it',
    'do',
    'or',
    'to',
    'a'
]


def str_replace(phrase):
    phrase = " " + phrase + " "
    for word in words:
        mtch = " " + word + " "
        if phrase.find(mtch) > -1:
            phrase = phrase.replace(mtch, " ")
    return phrase


def for_regex(phrase):
    for word in words:
        phrase = re.sub(r'\b' + word + r'\b', '', phrase)
    return " ".join(phrase.split())


def single_regex(phrase):
    regex = r'\b(' + '|'.join(words) + r')\b'
    result = re.sub(regex, '', phrase)
    return ' '.join(result.split())


REGEX = re.compile(r'\b(' + '|'.join(words) + r')\b')
def precalculated_single_regex(phrase):
    result = REGEX.sub('', phrase)
    return ' '.join(result.split())


def list_join(phrase):
    return ' '.join(w for w in phrase.split() if w not in words)


def dont_preserve_order(phrase):
    # remove noise to produce essence
    phrase = " " + phrase.lower() + " "
    result_set = set(set(phrase.split(' ')) - set(words))
    return (' '.join(result_set)).strip()


start = time.monotonic()
for i in range(100000):
    str_replace("his name is andy and he is cool")
stop = time.monotonic()
print('str_replace', stop - start)

start = time.monotonic()
for i in range(100000):
    for_regex("his name is andy and he is cool")
stop = time.monotonic()
print('for_regex', stop - start)

start = time.monotonic()
for i in range(100000):
    single_regex("his name is andy and he is cool")
stop = time.monotonic()
print('single_regex', stop - start)

start = time.monotonic()
for i in range(100000):
    precalculated_single_regex("his name is andy and he is cool")
stop = time.monotonic()
print('precalculated_single_regex', stop - start)

start = time.monotonic()
for i in range(100000):
    list_join("his name is andy and he is cool")
stop = time.monotonic()
print('list_join', stop - start)

start = time.monotonic()
for i in range(100000):
    dont_preserve_order("his name is andy and he is cool")
stop = time.monotonic()
print('dont_preserve_order', stop - start)
	import re
	import time

	words = [
	'where',
	'which',
	'when',
	'what',
	'that',
	'will',
	'from',
	'that',
	'also',
	'who',
	'how',
	'did',
	'and',
	'but',
	'the',
	'too',
	'why',
	'is',
	'it',
	'do',
	'or',
	'to',
	'a'
	]


	def str_replace(phrase):
	phrase = " " + phrase + " "
	for word in words:
	mtch = " " + word + " "
	if phrase.find(mtch) > -1:
	phrase = phrase.replace(mtch, " ")
	return phrase


	def for_regex(phrase):
	for word in words:
	phrase = re.sub(r'\b' + word + r'\b', '', phrase)
	return " ".join(phrase.split())


	def single_regex(phrase):
	regex = r'\b(' + '\|'.join(words) + r')\b'
	result = re.sub(regex, '', phrase)
	return ' '.join(result.split())


	REGEX = re.compile(r'\b(' + '\|'.join(words) + r')\b')
	def precalculated_single_regex(phrase):
	result = REGEX.sub('', phrase)
	return ' '.join(result.split())


	def list_join(phrase):
	return ' '.join(w for w in phrase.split() if w not in words)


	def dont_preserve_order(phrase):
	# remove noise to produce essence
	phrase = " " + phrase.lower() + " "
	result_set = set(set(phrase.split(' ')) - set(words))
	return (' '.join(result_set)).strip()


	start = time.monotonic()
	for i in range(100000):
	str_replace("his name is andy and he is cool")
	stop = time.monotonic()
	print('str_replace', stop - start)

	start = time.monotonic()
	for i in range(100000):
	for_regex("his name is andy and he is cool")
	stop = time.monotonic()
	print('for_regex', stop - start)

	start = time.monotonic()
	for i in range(100000):
	single_regex("his name is andy and he is cool")
	stop = time.monotonic()
	print('single_regex', stop - start)

	start = time.monotonic()
	for i in range(100000):
	precalculated_single_regex("his name is andy and he is cool")
	stop = time.monotonic()
	print('precalculated_single_regex', stop - start)

	start = time.monotonic()
	for i in range(100000):
	list_join("his name is andy and he is cool")
	stop = time.monotonic()
	print('list_join', stop - start)

	start = time.monotonic()
	for i in range(100000):
	dont_preserve_order("his name is andy and he is cool")
	stop = time.monotonic()
	print('dont_preserve_order', stop - start)