Skip to content

Instantly share code, notes, and snippets.

@kindly
Last active May 25, 2021 22:42
Show Gist options
  • Save kindly/0a6e48829efe022e0b4e9a4008d7d8a2 to your computer and use it in GitHub Desktop.
Save kindly/0a6e48829efe022e0b4e9a4008d7d8a2 to your computer and use it in GitHub Desktop.
##############################################################################
#
# Simple Python program to benchmark several Python Excel writing modules.
#
# python bench_excel_writers.py [num_rows] [num_cols]
#
# Copyright 2013-2018, John McNamara, Charlie Clark
#
import os
import sys
try:
from time import process_time
except ImportError:
from time import clock as process_time
from random import randint, sample
from string import ascii_letters
import openpyxl
import xlsxwriter
# Default to 1 sheet with 1000 rows x 50 cols
# 10 % strings
row_max = 1000
col_max = 50
sheets = 5
factor = 2
if len(sys.argv) > 1:
row_max = int(sys.argv[1])
if len(sys.argv) > 2:
col_max = int(sys.argv[2])
if len(sys.argv) > 3:
sheets = int(sys.argv[3])
if len(sys.argv) > 4:
factor = float(sys.argv[4])
def random_string():
"""
Return a random set of letters
"""
chars = sample(ascii_letters, randint(3, 12))
return "".join(chars)
strings = []
for r in range(int(sheets * row_max * factor)):
strings.append([random_string() for c in range(col_max)])
def print_elapsed_time(module_name, elapsed, optimised=False):
""" Print module run times in a consistent format. """
if optimised:
module_name += " (optimised)"
print(" %-22s: %6.2f" % (module_name, elapsed))
def time_xlsxwriter(optimised=False):
""" Run XlsxWriter in optimised/constant memory mode. """
options = {}
filename = 'xlsxwriter.xlsx'
module_name = "xlsxwriter"
if optimised:
options['constant_memory'] = True
filename = "xlsxwriter_opt.xlsx"
start_time = process_time()
string_rows = iter(strings)
workbook = xlsxwriter.Workbook(filename,
options=options)
worksheets = []
for r in range(sheets):
worksheet = workbook.add_worksheet()
worksheets.append(worksheet)
for row in range(row_max):
if not row % 1:
data = next(string_rows)
else:
data = [row + col for col in range(col_max)]
worksheet.write_row(row, 0, data)
string_rows = iter(strings)
for worksheet in worksheets:
for row in range(row_max):
if not row % 1:
data = next(string_rows)
else:
data = [row + col for col in range(col_max)]
worksheet.write_row(row+1000, 0, data)
workbook.close()
elapsed = process_time() - start_time
print_elapsed_time(module_name, elapsed, optimised)
#os.remove(filename)
def time_openpyxl(optimised=False):
""" Run OpenPyXL in default mode. """
module_name = "openpyxl"
filename = 'openpyxl.xlsx'
if optimised:
filename = 'openpyxl_opt.xlsx'
start_time = process_time()
string_rows = iter(strings)
workbook = openpyxl.Workbook(write_only=optimised)
worksheets = []
for r in range(sheets):
worksheet = workbook.create_sheet()
worksheets.append(worksheet)
for row in range(row_max):
if not row % 1:
data = next(string_rows)
else:
data = (row + col for col in range(col_max))
worksheet.append(data)
string_rows = iter(strings)
for worksheet in worksheets:
for row in range(row_max):
if not row % 1:
data = next(string_rows)
else:
data = [row + col for col in range(col_max)]
worksheet.append(data)
workbook.save(filename)
elapsed = process_time() - start_time
print_elapsed_time(module_name, elapsed, optimised)
#os.remove(filename)
print("")
print("Versions:")
print("%s: %s" % ('python', sys.version[:5]))
print("%s: %s" % ('openpyxl', openpyxl.__version__))
print("%s: %s" % ('xlsxwriter', xlsxwriter.__version__))
print("")
print("Dimensions:")
print(" Rows = %d" % row_max)
print(" Cols = %d" % col_max)
print(" Sheets = %d" % sheets)
print(" Proportion text = %0.2f" % factor)
print("")
print("Times:")
time_xlsxwriter()
time_xlsxwriter(optimised=True)
time_openpyxl()
time_openpyxl(optimised=True)
print("")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment