Skip to content

Instantly share code, notes, and snippets.

@farrokhi
Last active August 6, 2018 15:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save farrokhi/74d2fe2d4705a2556289c3cc9f76d20f to your computer and use it in GitHub Desktop.
Save farrokhi/74d2fe2d4705a2556289c3cc9f76d20f to your computer and use it in GitHub Desktop.
comparing performance of various string concatenation methods in python
from functools import wraps
import pickle
from random import randint, choice
import string
import pathlib
import time
import sys
big_list = [] # our huge list of tuples
cache_file = "test-data-cache.pickle"
def chrono(msg='', format_time=False):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
local_chrono = Chrono()
res = func(*args, **kwargs)
local_chrono.stop(msg, format_time)
return res
return wrapper
return decorator
class Chrono(object):
def __init__(self):
self._start_time = time.perf_counter()
@staticmethod
def _format_time(t) -> str:
hh = t // 3600
mm = (t % 3600) // 60
ss = t - (hh * 3600) - (mm * 60)
return "%02dh%02dm%02ds" % (hh, mm, ss)
def stop(self, msg='', format_time=False) -> float:
stop_time = time.perf_counter()
t_elapsed = stop_time - self._start_time
if msg: # print message if any
if format_time:
t_str = self._format_time(t_elapsed)
else:
t_str = "%.3f seconds" % t_elapsed
print("[ elapsed %s ] - %s" % (t_str, msg), file=sys.stderr)
return t_elapsed
def rand_str():
allchar = string.ascii_letters + string.digits
return "".join(choice(allchar) for _ in range(randint(6, 18)))
def build_list(n):
global big_list
c = Chrono()
for i in range(n):
tpl = (
rand_str(), str(randint(0, 655359999)), str(randint(0, 65535999)), str(randint(0, 655359999)), rand_str())
big_list.append(tpl + tpl + tpl + tpl) # each records has 20 fields
c.stop(f"built list of {list_size} items")
# save for later
dumpfile = open(cache_file, 'wb')
c = Chrono()
pickle.dump(big_list, dumpfile)
c.stop("dump cache file")
dumpfile.close()
def load_list():
global big_list
dumpfile = open(cache_file, 'rb')
c = Chrono()
big_list = pickle.load(dumpfile)
c.stop(f"load cache file : {len(big_list)} records")
dumpfile.close()
@chrono('str.join()')
def test_join():
arr = []
for item in big_list:
s = "|".join(item)
arr.append(s)
return arr[-1]
@chrono("percent formatting")
def test_percent_format():
arr = []
for item in big_list:
s = "%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s" % (
item[0], item[1], item[2], item[3], item[4], item[5], item[6], item[7], item[8], item[9], item[10],
item[11], item[12], item[13], item[14], item[15], item[16], item[17], item[18], item[19])
arr.append(s)
return arr[-1]
@chrono("f-string formatting")
def test_fstring_format():
arr = []
for item in big_list:
s = f"{item[0]}|{item[1]}|{item[2]}|{item[3]}|{item[4]}|{item[5]}|{item[6]}|{item[7]}|" \
f"{item[8]}|{item[9]}|{item[10]}|{item[11]}|{item[12]}|{item[13]}|{item[14]}|{item[15]}|" \
f"{item[16]}|{item[17]}|{item[18]}|{item[19]}"
arr.append(s)
return arr[-1]
@chrono("multiple string concat (+)")
def test_string_concat():
arr = []
for item in big_list:
s = item[0] + "|" + item[1] + "|" + item[2] + "|" + item[3] + "|" + item[4] + "|" + item[5] + "|" + item[
6] + "|" + item[7] + "|" + item[8] + "|" + item[9] + "|" + item[10] + "|" + item[11] + "|" + item[
12] + "|" + item[13] + "|" + item[14] + "|" + item[15] + "|" + item[16] + "|" + item[17] + "|" + item[
18] + "|" + item[19]
arr.append(s)
return arr[-1]
@chrono("str.format()")
def test_format_function():
arr = []
for item in big_list:
s = "{0}|{1}|{2}|{3}|{4}|{5}|{6}|{7}|{8}|{9}|{10}|{11}|{12}|{13}|{14}|{15}|{16}|{17}|{18}|{19}".format(
item[0], item[1], item[2], item[3], item[4], item[5], item[6], item[7], item[8], item[9], item[10],
item[11], item[12], item[13], item[14], item[15], item[16], item[17], item[18], item[19])
arr.append(s)
return arr[-1]
if __name__ == '__main__':
p = pathlib.Path(cache_file)
list_size = 500000
if p.exists():
load_list()
if not len(big_list) == list_size:
print("... rebuliding the list")
big_list = []
build_list(list_size)
else:
build_list(list_size)
v1 = test_join()
v2 = test_percent_format()
v3 = test_fstring_format()
v4 = test_string_concat()
v5 = test_format_function()
success = v1 == v2 == v3 == v4 == v5
if success:
print("Success: All functions generated the same results")
exit(0)
else:
print("Error: Output results are not the same:")
[print(v) for v in (v1, v2, v3, v4, v5)]
exit(1)
@farrokhi
Copy link
Author

farrokhi commented Aug 5, 2018

result on FreeBSD 11.2 x64 on a Xeon E5-2683 v3 @ 2.00GHz

# python3.6 str-perf-test.py                                                                                                                                            
[ elapsed 9.055 seconds ] - load cache file : 5000000 records
[ elapsed 8.712 seconds ] - str.join()
[ elapsed 16.945 seconds ] - percent formatting
[ elapsed 13.030 seconds ] - f-string formatting
[ elapsed 27.914 seconds ] - multiple string concat (+)
[ elapsed 24.126 seconds ] - str.format()
Success: All functions generated the same results

 # python3.7 str-perf-test.py                                                                                                                                            
[ elapsed 9.195 seconds ] - load cache file : 5000000 records
[ elapsed 8.281 seconds ] - str.join()
[ elapsed 17.174 seconds ] - percent formatting
[ elapsed 13.154 seconds ] - f-string formatting
[ elapsed 29.295 seconds ] - multiple string concat (+)
[ elapsed 23.174 seconds ] - str.format()
Success: All functions generated the same results

# pypy3 str-perf-test.py                                                                                                                                                
[ elapsed 87.455 seconds ] - load cache file : 5000000 records
[ elapsed 11.441 seconds ] - str.join()
[ elapsed 17.707 seconds ] - percent formatting
[ elapsed 10.026 seconds ] - f-string formatting
[ elapsed 9.080 seconds ] - multiple string concat (+)
[ elapsed 12.864 seconds ] - str.format()
Success: All functions generated the same results

@farrokhi
Copy link
Author

farrokhi commented Aug 6, 2018

result on FreeBSD 11.2 x64 on a Xeon CPU E5-4620 0 @ 2.20GHz

# python3.6 str-perf-test.py
[ elapsed 11.126 seconds ] - load cache file : 5000000 records
[ elapsed 7.999 seconds ] - str.join()
[ elapsed 18.283 seconds ] - percent formatting
[ elapsed 13.043 seconds ] - f-string formatting
[ elapsed 29.705 seconds ] - multiple string concat (+)
[ elapsed 25.400 seconds ] - str.format()
Success: All functions generated the same results

# python3.7 str-perf-test.py                                                                                                                                                                                                                                                                    
[ elapsed 10.912 seconds ] - load cache file : 5000000 records
[ elapsed 7.898 seconds ] - str.join()
[ elapsed 18.089 seconds ] - percent formatting
[ elapsed 13.005 seconds ] - f-string formatting
[ elapsed 29.910 seconds ] - multiple string concat (+)
[ elapsed 25.110 seconds ] - str.format()
Success: All functions generated the same results

# pypy3 str-perf-test.py
[ elapsed 121.232 seconds ] - load cache file : 5000000 records
[ elapsed 15.889 seconds ] - str.join()
[ elapsed 24.872 seconds ] - percent formatting
[ elapsed 16.148 seconds ] - f-string formatting
[ elapsed 14.568 seconds ] - multiple string concat (+)
[ elapsed 17.107 seconds ] - str.format()
Success: All functions generated the same results

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment