Last active
August 3, 2019 23:43
-
-
Save joseafga/ff798d340d79107ace14fd232abc4376 to your computer and use it in GitHub Desktop.
Python basic storage using file benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
import json | |
import yaml | |
import pickle | |
import csv | |
import time | |
import random | |
import itertools | |
from time import sleep | |
from timeit import timeit | |
from ast import literal_eval | |
# set repeat times | |
repeat = 100 | |
def random_dict(times=350): | |
""" Generate a random dictionary """ | |
chars = "abcdefghijklmnopqrstuvfz0123456789-_" # possible random chars | |
words = {} | |
for _ in itertools.repeat(None, times): # number dict items | |
key = '' | |
word = {} | |
# word/key lenght (min, max) | |
# for _ in itertools.repeat(None, random.randrange(5, 36)): | |
for _ in itertools.repeat(None, 30): | |
key += random.choice(chars) | |
k = w = '' | |
# nested | |
# for _ in itertools.repeat(None, random.randrange(5, 36)): | |
for _ in itertools.repeat(None, 30): | |
k += random.choice(chars) | |
w += random.choice(chars) | |
word[k] = w | |
words[key] = word | |
return words | |
# WRITE TESTS | |
# ----------- | |
def do_text(): | |
""" Save dict as plain text """ | |
with open("dict.txt", "w") as f: | |
f.write(str(mdict)) | |
def do_json(): | |
""" Serialize dict as JSON """ | |
jsonify = json.dumps(mdict) | |
with open("dict.json", "w") as f: | |
f.write(jsonify) | |
def do_pickle(): | |
""" Serialize dict as Python serialization format """ | |
with open("dict.pkl", "wb") as f: | |
pickle.dump(mdict, f) | |
def do_csv(): | |
""" Serialize dict as CSV data """ | |
with open("dict.csv", "w") as f: | |
w = csv.writer(f) | |
for key, val in mdict.items(): | |
w.writerow([key, val]) | |
def do_yaml(): | |
""" Serialize dict as YAML """ | |
with open("dict.yml", "w") as f: | |
yaml.dump(mdict, f, Dumper=yaml.CDumper) # use CDumper to speed up | |
# READ TESTS | |
# ---------- | |
def read_text(): | |
""" Read plain text to dict """ | |
with open("dict.txt", "r") as f: | |
literal_eval(f.read()) | |
def read_json(): | |
""" Deserialize JSON to dict """ | |
with open("dict.json", "r") as f: | |
json.loads(f.read()) | |
def read_pickle(): | |
""" Deserialize Python serialization format to dict """ | |
with open("dict.pkl", "rb") as f: | |
pickle.load(f) | |
def read_csv(): | |
""" Deserialize CSV data to dict """ | |
with open("dict.csv", "r") as f: | |
r = csv.reader(f) | |
for row in r: | |
# print("{0[0]}: {0[1]}".format(row)) | |
pass | |
def read_yaml(): | |
""" Deserialize YAML to dict """ | |
with open("dict.yml", "r") as f: | |
yaml.load(f, yaml.CLoader) # use CLoader to speed up | |
# set dictionary | |
mdict = random_dict() | |
# show settings information | |
print("dictionary: {} items, repeating: {} times".format( | |
len(mdict)**2+len(mdict), repeat)) | |
if __name__ == "__main__": | |
headers = [] | |
writes = [] | |
reads = [] | |
for func in ("text", "json", "pickle", "csv", "yaml"): | |
# set function name to table header | |
headers.append(func) | |
# write test | |
writes.append( | |
timeit(f"do_{func}()", setup=f"from __main__ import do_{func}", number=repeat)) | |
# read test | |
reads.append( | |
timeit(f"read_{func}()", setup=f"from __main__ import read_{func}", number=repeat)) | |
sleep(2) # seems to improved accuracy | |
# draw table | |
print( | |
"| type | {:^14} | {:^14} | {:^14} | {:^14} | {:^14} |".format(*headers)) | |
print( | |
"|:{0:{0}^5} |:{0:{0}^14}:|:{0:{0}^14}:|:{0:{0}^14}:|:{0:{0}^14}:|:{0:{0}^14}:|".format('-')) | |
print( | |
"| write | {:^14.10f} | {:^14.10f} | {:^14.10f} | {:^14.10f} | {:^14.10f} |".format(*writes)) | |
print( | |
"| read | {:^14.10f} | {:^14.10f} | {:^14.10f} | {:^14.10f} | {:^14.10f} |".format(*reads)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
My results
dictionary: 110 items, repeating: 100 times
dictionary: 10100 items, repeating: 100 times
dictionary: 122850 items, repeating: 100 times
Conclusion
JSON
andPickle
are the best options in terms of speed but is possible to make some more considerations:Small files size seems have same write speeds but
Pickle
wins on read speed.Medium and larger files size
JSON
is better on write andPickle
better on read.If there are heavy writing operations,
JSON
seems to be the best choice. If there are heavy reading operations and/or do not need interoperability,Pickle
for to win.