Skip to content

Instantly share code, notes, and snippets.

@joseafga
Last active August 3, 2019 23:43
Show Gist options
  • Save joseafga/ff798d340d79107ace14fd232abc4376 to your computer and use it in GitHub Desktop.
Save joseafga/ff798d340d79107ace14fd232abc4376 to your computer and use it in GitHub Desktop.
Python basic storage using file benchmark
#! /usr/bin/env python3
import json
import yaml
import pickle
import csv
import time
import random
import itertools
from time import sleep
from timeit import timeit
from ast import literal_eval
# set repeat times
repeat = 100
def random_dict(times=350):
""" Generate a random dictionary """
chars = "abcdefghijklmnopqrstuvfz0123456789-_" # possible random chars
words = {}
for _ in itertools.repeat(None, times): # number dict items
key = ''
word = {}
# word/key lenght (min, max)
# for _ in itertools.repeat(None, random.randrange(5, 36)):
for _ in itertools.repeat(None, 30):
key += random.choice(chars)
k = w = ''
# nested
# for _ in itertools.repeat(None, random.randrange(5, 36)):
for _ in itertools.repeat(None, 30):
k += random.choice(chars)
w += random.choice(chars)
word[k] = w
words[key] = word
return words
# WRITE TESTS
# -----------
def do_text():
""" Save dict as plain text """
with open("dict.txt", "w") as f:
f.write(str(mdict))
def do_json():
""" Serialize dict as JSON """
jsonify = json.dumps(mdict)
with open("dict.json", "w") as f:
f.write(jsonify)
def do_pickle():
""" Serialize dict as Python serialization format """
with open("dict.pkl", "wb") as f:
pickle.dump(mdict, f)
def do_csv():
""" Serialize dict as CSV data """
with open("dict.csv", "w") as f:
w = csv.writer(f)
for key, val in mdict.items():
w.writerow([key, val])
def do_yaml():
""" Serialize dict as YAML """
with open("dict.yml", "w") as f:
yaml.dump(mdict, f, Dumper=yaml.CDumper) # use CDumper to speed up
# READ TESTS
# ----------
def read_text():
""" Read plain text to dict """
with open("dict.txt", "r") as f:
literal_eval(f.read())
def read_json():
""" Deserialize JSON to dict """
with open("dict.json", "r") as f:
json.loads(f.read())
def read_pickle():
""" Deserialize Python serialization format to dict """
with open("dict.pkl", "rb") as f:
pickle.load(f)
def read_csv():
""" Deserialize CSV data to dict """
with open("dict.csv", "r") as f:
r = csv.reader(f)
for row in r:
# print("{0[0]}: {0[1]}".format(row))
pass
def read_yaml():
""" Deserialize YAML to dict """
with open("dict.yml", "r") as f:
yaml.load(f, yaml.CLoader) # use CLoader to speed up
# set dictionary
mdict = random_dict()
# show settings information
print("dictionary: {} items, repeating: {} times".format(
len(mdict)**2+len(mdict), repeat))
if __name__ == "__main__":
headers = []
writes = []
reads = []
for func in ("text", "json", "pickle", "csv", "yaml"):
# set function name to table header
headers.append(func)
# write test
writes.append(
timeit(f"do_{func}()", setup=f"from __main__ import do_{func}", number=repeat))
# read test
reads.append(
timeit(f"read_{func}()", setup=f"from __main__ import read_{func}", number=repeat))
sleep(2) # seems to improved accuracy
# draw table
print(
"| type | {:^14} | {:^14} | {:^14} | {:^14} | {:^14} |".format(*headers))
print(
"|:{0:{0}^5} |:{0:{0}^14}:|:{0:{0}^14}:|:{0:{0}^14}:|:{0:{0}^14}:|:{0:{0}^14}:|".format('-'))
print(
"| write | {:^14.10f} | {:^14.10f} | {:^14.10f} | {:^14.10f} | {:^14.10f} |".format(*writes))
print(
"| read | {:^14.10f} | {:^14.10f} | {:^14.10f} | {:^14.10f} | {:^14.10f} |".format(*reads))
@joseafga
Copy link
Author

My results

dictionary: 110 items, repeating: 100 times

type text json pickle csv yaml
write 0.0375659640 0.0282231770 0.0283864960 0.0792802430 0.4417921260
read 0.1226409720 0.0149921170 0.0104380230 0.0254502670 0.4692441650

dictionary: 10100 items, repeating: 100 times

type text json pickle csv yaml
write 0.2090152320 0.1540276060 0.2187984460 0.6661074660 4.2863682450
read 1.3025133310 0.1226977360 0.0888515250 0.2275468410 4.7532115050

dictionary: 122850 items, repeating: 100 times

type text json pickle csv yaml
write 0.7211231260 0.4885269330 0.5673695680 2.3524030680 15.2600903070
read 4.6809786060 0.4550038220 0.3325577080 0.7827057010 16.6697322850

Conclusion

JSON and Pickle are the best options in terms of speed but is possible to make some more considerations:
Small files size seems have same write speeds but Pickle wins on read speed.
Medium and larger files size JSON is better on write and Pickle better on read.
If there are heavy writing operations, JSON seems to be the best choice. If there are heavy reading operations and/or do not need interoperability, Pickle for to win.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment