Created
August 28, 2021 20:34
-
-
Save mawillcockson/ea887ff974e22e7012766aa640d488ad to your computer and use it in GitHub Desktop.
Python String Concatenation Timing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from pathlib import Path | |
from itertools import islice | |
from csv import DictWriter | |
from collections import defaultdict | |
def main(): | |
results_path = Path("~/projects/time_python_string_concatenation_results.txt").expanduser().resolve(strict=True) | |
parse_path = Path("~/projects/time_python_string_concatenation_parsed_results.txt").expanduser().resolve() | |
parse_path.touch() | |
lines = results_path.read_text().splitlines() | |
runs = [list(islice(lines, i, i + 2)) for i in range(0, len(lines), 2)] | |
results = defaultdict(dict) | |
units = { | |
"usec": 1/1_000_000, | |
"msec": 1/1_000, | |
"sec": 1, | |
} | |
for run in runs: | |
first_line = re.match(r"^#(?P<attempt>\d+) - (?P<count>\d+)$", run[0]) | |
count = int(first_line["count"]) | |
attempt_number = int(first_line["attempt"]) | |
time = re.search(r": (?P<value>\d+(\.\d+)?(e\d+)?) (?P<unit>.?sec)", run[-1]) | |
parsed_time = float(time["value"]) * units[time["unit"]] | |
results[count][attempt_number] = parsed_time | |
with parse_path.open(mode="wt") as file: | |
writer = DictWriter(file, fieldnames=["count"] + list(results[list(results)[0]])) | |
writer.writeheader() | |
for count in results: | |
writer.writerow({"count": count, **results[count]}) | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set -eu | |
ATTEMPT_1=" | |
for _ in range(%s): | |
s += data | |
" | |
ATTEMPT_2=" | |
for _ in range(%s): | |
l.append(data) | |
''.join(l) | |
" | |
ATTEMPT_3=" | |
for _ in range(%s): | |
s = f'{s}{data}' | |
" | |
ATTEMPT_4=" | |
for _ in range(%s): | |
d[_] = data | |
s = ''.join(d.values()) | |
" | |
ATTEMPT_5=" | |
for _ in range(%s): | |
s = ''.join((s,data)) | |
" | |
ATTEMPT_6=" | |
for _ in range(%s): | |
a.frombytes(data.encode('utf-8')) | |
a.tobytes().decode() | |
" | |
ATTEMPT_7=" | |
for _ in range(%s): | |
au.fromunicode(data) | |
au.tounicode() | |
" | |
ATTEMPT_8=" | |
for _ in range(%s): | |
d[data] = 0 | |
s = ''.join(d) | |
" | |
for count in $(seq 1000 10000 101000); do | |
for attempt_number in $(seq 1 1 8); do | |
ATTEMPT="$(eval 'echo "${ATTEMPT_'"$attempt_number"'}"')" | |
FORMATTED_ATTEMPT="$(printf "$ATTEMPT" "$count")" | |
echo "#$attempt_number - $count" | |
python -m timeit -s "import array;a=array.array('B');au=array.array('u');s='';l=[];d={};data='a'" "$FORMATTED_ATTEMPT" | |
done | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set -eu | |
ATTEMPT_1=" | |
for _ in range(%s): | |
s += data | |
" | |
ATTEMPT_2=" | |
for _ in range(%s): | |
l.append(data) | |
b''.join(l) | |
" | |
ATTEMPT_3="0" | |
ATTEMPT_4=" | |
for _ in range(%s): | |
d[_] = data | |
s = b''.join(d.values()) | |
" | |
ATTEMPT_5=" | |
for _ in range(%s): | |
s = b''.join((s,data)) | |
" | |
ATTEMPT_6=" | |
for _ in range(%s): | |
a.frombytes(data) | |
s = a.tobytes() | |
" | |
ATTEMPT_7=" | |
for _ in range(%s): | |
au.fromunicode(data.decode()) | |
au.tobytes() | |
" | |
ATTEMPT_8=" | |
for _ in range(%s): | |
d[data] = 0 | |
s = b''.join(d) | |
" | |
for count in $(seq 1000 10000 101000); do | |
for attempt_number in $(seq 1 1 8); do | |
ATTEMPT="$(eval 'echo "${ATTEMPT_'"$attempt_number"'}"')" | |
FORMATTED_ATTEMPT="$(printf "$ATTEMPT" "$count")" | |
echo "#$attempt_number - $count" | |
python -m timeit -s "import array;a=array.array('B');au=array.array('u');s=b'';l=[];d={};data=b'a'" "$FORMATTED_ATTEMPT" | |
done | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set -eu | |
ATTEMPT_1=" | |
for _ in range(%s): | |
s += data | |
" | |
ATTEMPT_2=" | |
for _ in range(%s): | |
l.append(data) | |
b''.join(l) | |
" | |
ATTEMPT_3="0" | |
ATTEMPT_4=" | |
for _ in range(%s): | |
d[_] = data | |
s = b''.join(d.values()) | |
" | |
ATTEMPT_5=" | |
for _ in range(%s): | |
s = b''.join((s,data)) | |
" | |
ATTEMPT_6=" | |
for _ in range(%s): | |
a.frombytes(data) | |
s = a.tobytes() | |
" | |
ATTEMPT_7=" | |
for _ in range(%s): | |
au.fromunicode(data.decode()) | |
au.tobytes() | |
" | |
ATTEMPT_8=" | |
for _ in range(%s): | |
d[data] = 0 | |
s = b''.join(d) | |
" | |
for count in $(seq 10 10 100) $(seq 200 100 1000); do | |
for attempt_number in $(seq 1 1 8); do | |
ATTEMPT="$(eval 'echo "${ATTEMPT_'"$attempt_number"'}"')" | |
FORMATTED_ATTEMPT="$(printf "$ATTEMPT" "10000")" | |
echo "#$attempt_number - $count" | |
python -m timeit -s "import array;a=array.array('B');au=array.array('u');s=b'';l=[];d={};data=b'a'*$count" "$FORMATTED_ATTEMPT" | |
done | |
done |
What is interesting is that:
list = []
list.append(data)
is slower than
dictionary = {}
dictionary[data] = 0
It makes sense to me how the fastest insertion of all is:
list = [data]
list[0] = data
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
graphs of the results
These kind of microbenchmarks aren't necessarily useful, but they're fun.
Ultimately the fastest for all scenarios is something like:
In my use case I had a stream of data, so the above wouldn't work.