joseafga/benchmark-file-storage.py

## benchmark-file-storage.py
#! /usr/bin/env python3
import json
import yaml
import pickle
import csv
import time
import random
import itertools
from time import sleep
from timeit import timeit
from ast import literal_eval

# set repeat times
repeat = 100


def random_dict(times=350):
    """ Generate a random dictionary """
    chars = "abcdefghijklmnopqrstuvfz0123456789-_"  # possible random chars
    words = {}

    for _ in itertools.repeat(None, times):  # number dict items
        key = ''
        word = {}

        # word/key lenght (min, max)
        # for _ in itertools.repeat(None, random.randrange(5, 36)):
        for _ in itertools.repeat(None, 30):
            key += random.choice(chars)
            k = w = ''

            # nested
            # for _ in itertools.repeat(None, random.randrange(5, 36)):
            for _ in itertools.repeat(None, 30):
                k += random.choice(chars)
                w += random.choice(chars)

            word[k] = w

        words[key] = word

    return words


# WRITE TESTS
# -----------

def do_text():
    """ Save dict as plain text """
    with open("dict.txt", "w") as f:
        f.write(str(mdict))


def do_json():
    """ Serialize dict as JSON """
    jsonify = json.dumps(mdict)
    with open("dict.json", "w") as f:
        f.write(jsonify)


def do_pickle():
    """ Serialize dict as Python serialization format """
    with open("dict.pkl", "wb") as f:
        pickle.dump(mdict, f)


def do_csv():
    """ Serialize dict as CSV data """
    with open("dict.csv", "w") as f:
        w = csv.writer(f)
        for key, val in mdict.items():
            w.writerow([key, val])


def do_yaml():
    """ Serialize dict as YAML """
    with open("dict.yml", "w") as f:
        yaml.dump(mdict, f, Dumper=yaml.CDumper)  # use CDumper to speed up


# READ TESTS
# ----------

def read_text():
    """ Read plain text to dict """
    with open("dict.txt", "r") as f:
        literal_eval(f.read())


def read_json():
    """ Deserialize JSON to dict """
    with open("dict.json", "r") as f:
        json.loads(f.read())


def read_pickle():
    """ Deserialize Python serialization format to dict """
    with open("dict.pkl", "rb") as f:
        pickle.load(f)


def read_csv():
    """ Deserialize CSV data to dict """
    with open("dict.csv", "r") as f:
        r = csv.reader(f)
        for row in r:
            # print("{0[0]}: {0[1]}".format(row))
            pass


def read_yaml():
    """ Deserialize YAML to dict """
    with open("dict.yml", "r") as f:
        yaml.load(f, yaml.CLoader)  # use CLoader to speed up


# set dictionary
mdict = random_dict()
# show settings information
print("dictionary: {} items, repeating: {} times".format(
    len(mdict)**2+len(mdict), repeat))


if __name__ == "__main__":
    headers = []
    writes = []
    reads = []

    for func in ("text", "json", "pickle", "csv", "yaml"):
        # set function name to table header
        headers.append(func)

        # write test
        writes.append(
            timeit(f"do_{func}()", setup=f"from __main__ import do_{func}", number=repeat))

        # read test
        reads.append(
            timeit(f"read_{func}()", setup=f"from __main__ import read_{func}", number=repeat))

        sleep(2)  # seems to improved accuracy

    # draw table
    print(
        "| type  | {:^14} | {:^14} | {:^14} | {:^14} | {:^14} |".format(*headers))
    print(
        "|:{0:{0}^5} |:{0:{0}^14}:|:{0:{0}^14}:|:{0:{0}^14}:|:{0:{0}^14}:|:{0:{0}^14}:|".format('-'))
    print(
        "| write | {:^14.10f} | {:^14.10f} | {:^14.10f} | {:^14.10f} | {:^14.10f} |".format(*writes))
    print(
        "| read  | {:^14.10f} | {:^14.10f} | {:^14.10f} | {:^14.10f} | {:^14.10f} |".format(*reads))
	#! /usr/bin/env python3
	import json
	import yaml
	import pickle
	import csv
	import time
	import random
	import itertools
	from time import sleep
	from timeit import timeit
	from ast import literal_eval

	# set repeat times
	repeat = 100


	def random_dict(times=350):
	""" Generate a random dictionary """
	chars = "abcdefghijklmnopqrstuvfz0123456789-_" # possible random chars
	words = {}

	for _ in itertools.repeat(None, times): # number dict items
	key = ''
	word = {}

	# word/key lenght (min, max)
	# for _ in itertools.repeat(None, random.randrange(5, 36)):
	for _ in itertools.repeat(None, 30):
	key += random.choice(chars)
	k = w = ''

	# nested
	# for _ in itertools.repeat(None, random.randrange(5, 36)):
	for _ in itertools.repeat(None, 30):
	k += random.choice(chars)
	w += random.choice(chars)

	word[k] = w

	words[key] = word

	return words


	# WRITE TESTS
	# -----------

	def do_text():
	""" Save dict as plain text """
	with open("dict.txt", "w") as f:
	f.write(str(mdict))


	def do_json():
	""" Serialize dict as JSON """
	jsonify = json.dumps(mdict)
	with open("dict.json", "w") as f:
	f.write(jsonify)


	def do_pickle():
	""" Serialize dict as Python serialization format """
	with open("dict.pkl", "wb") as f:
	pickle.dump(mdict, f)


	def do_csv():
	""" Serialize dict as CSV data """
	with open("dict.csv", "w") as f:
	w = csv.writer(f)
	for key, val in mdict.items():
	w.writerow([key, val])


	def do_yaml():
	""" Serialize dict as YAML """
	with open("dict.yml", "w") as f:
	yaml.dump(mdict, f, Dumper=yaml.CDumper) # use CDumper to speed up


	# READ TESTS
	# ----------

	def read_text():
	""" Read plain text to dict """
	with open("dict.txt", "r") as f:
	literal_eval(f.read())


	def read_json():
	""" Deserialize JSON to dict """
	with open("dict.json", "r") as f:
	json.loads(f.read())


	def read_pickle():
	""" Deserialize Python serialization format to dict """
	with open("dict.pkl", "rb") as f:
	pickle.load(f)


	def read_csv():
	""" Deserialize CSV data to dict """
	with open("dict.csv", "r") as f:
	r = csv.reader(f)
	for row in r:
	# print("{0[0]}: {0[1]}".format(row))
	pass


	def read_yaml():
	""" Deserialize YAML to dict """
	with open("dict.yml", "r") as f:
	yaml.load(f, yaml.CLoader) # use CLoader to speed up


	# set dictionary
	mdict = random_dict()
	# show settings information
	print("dictionary: {} items, repeating: {} times".format(
	len(mdict)**2+len(mdict), repeat))


	if __name__ == "__main__":
	headers = []
	writes = []
	reads = []

	for func in ("text", "json", "pickle", "csv", "yaml"):
	# set function name to table header
	headers.append(func)

	# write test
	writes.append(
	timeit(f"do_{func}()", setup=f"from __main__ import do_{func}", number=repeat))

	# read test
	reads.append(
	timeit(f"read_{func}()", setup=f"from __main__ import read_{func}", number=repeat))

	sleep(2) # seems to improved accuracy

	# draw table
	print(
	"\| type \| {:^14} \| {:^14} \| {:^14} \| {:^14} \| {:^14} \|".format(*headers))
	print(
	"\|:{0:{0}^5} \|:{0:{0}^14}:\|:{0:{0}^14}:\|:{0:{0}^14}:\|:{0:{0}^14}:\|:{0:{0}^14}:\|".format('-'))
	print(
	"\| write \| {:^14.10f} \| {:^14.10f} \| {:^14.10f} \| {:^14.10f} \| {:^14.10f} \|".format(*writes))
	print(
	"\| read \| {:^14.10f} \| {:^14.10f} \| {:^14.10f} \| {:^14.10f} \| {:^14.10f} \|".format(*reads))
type	text	json	pickle	csv	yaml
write	0.0375659640	0.0282231770	0.0283864960	0.0792802430	0.4417921260
read	0.1226409720	0.0149921170	0.0104380230	0.0254502670	0.4692441650
type	text	json	pickle	csv	yaml
write	0.2090152320	0.1540276060	0.2187984460	0.6661074660	4.2863682450
read	1.3025133310	0.1226977360	0.0888515250	0.2275468410	4.7532115050
type	text	json	pickle	csv	yaml
write	0.7211231260	0.4885269330	0.5673695680	2.3524030680	15.2600903070
read	4.6809786060	0.4550038220	0.3325577080	0.7827057010	16.6697322850