bootandy/string_split_speed.py

## string_split_speed.py
import time
import re

def load():
    file_to_open = open('data.txt', "r")
    return file_to_open.read()

def print_me(s):
    print s

def capitalize_first(s):
    return s[0:1].upper() + s[1:].lower()

def split_by_array_use_map(big):
    start = time.time()
    arr = big.split(" ")

    arr2 = map(capitalize_first, arr)
    end = time.time()
    print split_by_array_use_map.__name__ + ' : ' + str(end - start)
    print len(arr)
    #map(print_me, arr)

def split_by_array_use_loop(big):
    start = time.time()
    arr = big.split(" ")
    arr2 = []
    for s in arr:
        arr2.append(capitalize_first(s))
    end = time.time()
    print split_by_array_use_loop.__name__ + ' : ' + str(end - start)
    print len(arr)
    #map(print_me, arr)

def split_by_indexof(big):
    start = time.time()
    arr = []
    # Hack to ensure a space on the end as we split by space
    big += " "

    while (len(big) > 0):
        s = big[0 : big.find(" ")]
        arr.append(capitalize_first(s))
        big = big[big.find(" ") + 1:].lstrip()

    end = time.time()
    print split_by_indexof.__name__ + ' : ' + str(end - start)
    print len(arr)
    #map(print_me, arr)

def split_by_regex(big):
    start = time.time()
    arr = []

    while (len(big) > 0):
        match = re.match(r'^\W*(\w+)', big)
        if match == None:
            break
        first_word = match.group(1)
        arr.append(capitalize_first(first_word))
        big = big[len(first_word):]

    end = time.time()
    print split_by_regex.__name__ + ' : ' + str(end - start)
    print len(arr)
    #map(print_me, arr)

def split_by_regex_find_all(big):
    start = time.time()
    matches = re.findall(r'\W*(\w+)', big)
    arr2 = map(capitalize_first, matches)
    end = time.time()
    print split_by_regex_find_all.__name__ + ' : ' + str(end - start)
    print len(arr2)

def main():
    big = load()
    split_by_array_use_loop(big)
    split_by_array_use_map(big)
    split_by_indexof(big)
    split_by_regex(big)


if __name__ == '__main__':
    main()

#results:
#split_by_array_use_loop : 0.152453184128
#split_by_array_use_map : 0.136634111404
#split_by_indexof : 28.3557610512
#split_by_regex : 132.38497901
#split_by_regex_find_all : 0.226699113846
	import time
	import re

	def load():
	file_to_open = open('data.txt', "r")
	return file_to_open.read()

	def print_me(s):
	print s

	def capitalize_first(s):
	return s[0:1].upper() + s[1:].lower()

	def split_by_array_use_map(big):
	start = time.time()
	arr = big.split(" ")

	arr2 = map(capitalize_first, arr)
	end = time.time()
	print split_by_array_use_map.__name__ + ' : ' + str(end - start)
	print len(arr)
	#map(print_me, arr)

	def split_by_array_use_loop(big):
	start = time.time()
	arr = big.split(" ")
	arr2 = []
	for s in arr:
	arr2.append(capitalize_first(s))
	end = time.time()
	print split_by_array_use_loop.__name__ + ' : ' + str(end - start)
	print len(arr)
	#map(print_me, arr)

	def split_by_indexof(big):
	start = time.time()
	arr = []
	# Hack to ensure a space on the end as we split by space
	big += " "

	while (len(big) > 0):
	s = big[0 : big.find(" ")]
	arr.append(capitalize_first(s))
	big = big[big.find(" ") + 1:].lstrip()

	end = time.time()
	print split_by_indexof.__name__ + ' : ' + str(end - start)
	print len(arr)
	#map(print_me, arr)

	def split_by_regex(big):
	start = time.time()
	arr = []

	while (len(big) > 0):
	match = re.match(r'^\W*(\w+)', big)
	if match == None:
	break
	first_word = match.group(1)
	arr.append(capitalize_first(first_word))
	big = big[len(first_word):]

	end = time.time()
	print split_by_regex.__name__ + ' : ' + str(end - start)
	print len(arr)
	#map(print_me, arr)

	def split_by_regex_find_all(big):
	start = time.time()
	matches = re.findall(r'\W*(\w+)', big)
	arr2 = map(capitalize_first, matches)
	end = time.time()
	print split_by_regex_find_all.__name__ + ' : ' + str(end - start)
	print len(arr2)

	def main():
	big = load()
	split_by_array_use_loop(big)
	split_by_array_use_map(big)
	split_by_indexof(big)
	split_by_regex(big)


	if __name__ == '__main__':
	main()

	#results:
	#split_by_array_use_loop : 0.152453184128
	#split_by_array_use_map : 0.136634111404
	#split_by_indexof : 28.3557610512
	#split_by_regex : 132.38497901
	#split_by_regex_find_all : 0.226699113846