Skip to content

Instantly share code, notes, and snippets.

@bootandy
Created July 18, 2012 21:11
Show Gist options
  • Save bootandy/3138930 to your computer and use it in GitHub Desktop.
Save bootandy/3138930 to your computer and use it in GitHub Desktop.
python string split speed
import time
import re
def load():
file_to_open = open('data.txt', "r")
return file_to_open.read()
def print_me(s):
print s
def capitalize_first(s):
return s[0:1].upper() + s[1:].lower()
def split_by_array_use_map(big):
start = time.time()
arr = big.split(" ")
arr2 = map(capitalize_first, arr)
end = time.time()
print split_by_array_use_map.__name__ + ' : ' + str(end - start)
print len(arr)
#map(print_me, arr)
def split_by_array_use_loop(big):
start = time.time()
arr = big.split(" ")
arr2 = []
for s in arr:
arr2.append(capitalize_first(s))
end = time.time()
print split_by_array_use_loop.__name__ + ' : ' + str(end - start)
print len(arr)
#map(print_me, arr)
def split_by_indexof(big):
start = time.time()
arr = []
# Hack to ensure a space on the end as we split by space
big += " "
while (len(big) > 0):
s = big[0 : big.find(" ")]
arr.append(capitalize_first(s))
big = big[big.find(" ") + 1:].lstrip()
end = time.time()
print split_by_indexof.__name__ + ' : ' + str(end - start)
print len(arr)
#map(print_me, arr)
def split_by_regex(big):
start = time.time()
arr = []
while (len(big) > 0):
match = re.match(r'^\W*(\w+)', big)
if match == None:
break
first_word = match.group(1)
arr.append(capitalize_first(first_word))
big = big[len(first_word):]
end = time.time()
print split_by_regex.__name__ + ' : ' + str(end - start)
print len(arr)
#map(print_me, arr)
def split_by_regex_find_all(big):
start = time.time()
matches = re.findall(r'\W*(\w+)', big)
arr2 = map(capitalize_first, matches)
end = time.time()
print split_by_regex_find_all.__name__ + ' : ' + str(end - start)
print len(arr2)
def main():
big = load()
split_by_array_use_loop(big)
split_by_array_use_map(big)
split_by_indexof(big)
split_by_regex(big)
if __name__ == '__main__':
main()
#results:
#split_by_array_use_loop : 0.152453184128
#split_by_array_use_map : 0.136634111404
#split_by_indexof : 28.3557610512
#split_by_regex : 132.38497901
#split_by_regex_find_all : 0.226699113846
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment