Skip to content

Instantly share code, notes, and snippets.

@clokep
Last active August 10, 2020 17:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save clokep/20c7cf34006099120bea5bbbb1c76c97 to your computer and use it in GitHub Desktop.
Save clokep/20c7cf34006099120bea5bbbb1c76c97 to your computer and use it in GitHub Desktop.
Benchmark encoding many small strs vs. one large string
#!/usr/bin/env python
import json
import timeit
# number of runs over the data for each repeat
N_RUNS = 2000000
# number of times to repeat the runs. We'll take the fastest run.
N_REPEATS = 5
def benchmark_encode_once(objs):
for obj in objs:
"".join(obj).encoder("utf-8")
def benchmark_encode_each(objs):
for obj in objs:
b"".join([part.encode("utf-8") for part in obj])
def run_benchmarks():
encoder = json.JSONEncoder()
with open('data/large.json') as f:
large_obj_data = f.read()
large_obj = list(encoder.iterencode(json.loads(large_obj_data)))
with open('data/one-json-per-line.txt') as f:
small_objs_data = f.readlines()
small_objs = [list(encoder.iterencode(line)) for line in small_objs_data]
benchmarks = [
('encode once (large obj)', lambda: benchmark_encode_once([large_obj])),
('encode once (small objs)', lambda: benchmark_encode_once(small_objs)),
('encode each (large obj)', lambda: benchmark_encode_each([large_obj])),
('encode each (small objs)', lambda: benchmark_encode_each(small_objs)),
]
print('Running benchmarks...')
for benchmark_name, fn in benchmarks:
print(' %s...' % benchmark_name)
time = timeit.timeit(lambda: fn, number=1)
print(' first run: %f' % time)
times = timeit.repeat(lambda: fn, number=N_RUNS, repeat=N_REPEATS)
best = min(times)
print(' %i loops, best of %i: %f sec per loop (best total %f)' % (
N_RUNS, len(times), best / N_RUNS, best
))
if __name__ == '__main__':
run_benchmarks()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment