Skip to content

Instantly share code, notes, and snippets.

@acdha
Forked from jmcnamara/bench_excel_writers.py
Last active April 28, 2018 04:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save acdha/4976b9831224d2713997 to your computer and use it in GitHub Desktop.
Save acdha/4976b9831224d2713997 to your computer and use it in GitHub Desktop.
Benchmark of several Python Excel writing modules
Versions:
python : 2.7.1
openpyxl : 2.2.5
pyexcelerate: 0.6.6
xlsxwriter : 0.7.3
xlwt : 1.0.0
Dimensions:
Rows = 10000
Cols = 50
Times:
pyexcelerate : 4.14
xlwt : 6.09
xlsxwriter (optimised): 7.58
xlsxwriter : 8.99
openpyxl (optimised): 10.55
openpyxl : 14.06
# encoding: utf-8
##############################################################################
#
# Simple Python program to benchmark several Python Excel writing modules.
#
# python bench_excel_writers.py [num_rows] [num_cols]
#
# Run with --unicode to measure performance with non-ASCII text
import sys
from time import clock
import openpyxl
import pyexcelerate
import xlsxwriter
import xlwt
from openpyxl.cell import get_column_letter
# This allows testing whether Unicode processing imposes extra overhead:
UNICODE_CELL_TEMPLATE = u"\u23F1 Row: %d Col: %d"
ASCII_CELL_TEMPLATE = "Row: %d Col: %d"
if '--unicode' in sys.argv:
sys.argv.remove('--unicode')
CELL_TEMPLATE = UNICODE_CELL_TEMPLATE
else:
CELL_TEMPLATE = ASCII_CELL_TEMPLATE
# Default to 1000 rows x 50 cols.
if len(sys.argv) > 1:
row_max = int(sys.argv[1])
col_max = 50
else:
row_max = 1000
col_max = 50
if len(sys.argv) > 2:
col_max = int(sys.argv[2])
def print_elapsed_time(module_name, elapsed):
""" Print module run times in a consistent format. """
print(" %-22s: %6.2f" % (module_name, elapsed))
def time_xlsxwriter():
""" Run XlsxWriter in default mode. """
start_time = clock()
workbook = xlsxwriter.Workbook('xlsxwriter.xlsx')
worksheet = workbook.add_worksheet()
for row in range(row_max // 2):
for col in range(col_max):
worksheet.write_string(row * 2, col, CELL_TEMPLATE % (row, col))
for col in range(col_max):
worksheet.write_number(row * 2 + 1, col, row + col)
workbook.close()
elapsed = clock() - start_time
print_elapsed_time('xlsxwriter', elapsed)
def time_xlsxwriter_optimised():
""" Run XlsxWriter in optimised/constant memory mode. """
start_time = clock()
workbook = xlsxwriter.Workbook('xlsxwriter_opt.xlsx',
{'constant_memory': True})
worksheet = workbook.add_worksheet()
for row in range(row_max // 2):
for col in range(col_max):
worksheet.write_string(row * 2, col, CELL_TEMPLATE % (row, col))
for col in range(col_max):
worksheet.write_number(row * 2 + 1, col, row + col)
workbook.close()
elapsed = clock() - start_time
print_elapsed_time('xlsxwriter (optimised)', elapsed)
def time_openpyxl():
""" Run OpenPyXL in default mode. """
start_time = clock()
workbook = openpyxl.workbook.Workbook()
worksheet = workbook.active
for row in range(row_max // 2):
for col in range(col_max):
colletter = get_column_letter(col + 1)
worksheet.cell('%s%s' % (colletter, row * 2 + 1)).value = CELL_TEMPLATE % (row, col)
for col in range(col_max):
colletter = get_column_letter(col + 1)
worksheet.cell('%s%s' % (colletter, row * 2 + 2)).value = row + col
workbook.save('openpyxl.xlsx')
elapsed = clock() - start_time
print_elapsed_time('openpyxl', elapsed)
def time_openpyxl_optimised():
""" Run OpenPyXL in optimised mode. """
start_time = clock()
workbook = openpyxl.workbook.Workbook(optimized_write=True)
worksheet = workbook.create_sheet()
for row in range(row_max // 2):
string_data = [CELL_TEMPLATE % (row, col) for col in range(col_max)]
worksheet.append(string_data)
num_data = [row + col for col in range(col_max)]
worksheet.append(num_data)
workbook.save('openpyxl_opt.xlsx')
elapsed = clock() - start_time
print_elapsed_time('openpyxl (optimised)', elapsed)
def time_pyexcelerate():
""" Run pyexcelerate in "faster" mode. """
start_time = clock()
workbook = pyexcelerate.Workbook()
worksheet = workbook.new_sheet('Sheet1')
for row in range(row_max // 2):
for col in range(col_max):
worksheet.set_cell_value(row * 2 + 1, col + 1, CELL_TEMPLATE % (row, col))
for col in range(col_max):
worksheet.set_cell_value(row * 2 + 2, col + 1, row + col)
workbook.save('pyexcelerate.xlsx')
elapsed = clock() - start_time
print_elapsed_time('pyexcelerate', elapsed)
def time_xlwt():
""" Run xlwt in default mode. """
start_time = clock()
workbook = xlwt.Workbook()
worksheet = workbook.add_sheet('Sheet1')
for row in range(row_max // 2):
for col in range(col_max):
worksheet.write(row * 2, col, CELL_TEMPLATE % (row, col))
for col in range(col_max):
worksheet.write(row * 2 + 1, col, row + col)
workbook.save('xlwt.xls')
elapsed = clock() - start_time
print_elapsed_time('xlwt', elapsed)
print("")
print("Versions:")
print(" %-12s: %s" % ('python', sys.version[:5]))
print(" %-12s: %s" % ('openpyxl', openpyxl.__version__))
print(" %-12s: %s" % ('pyexcelerate', pyexcelerate.__version__))
print(" %-12s: %s" % ('xlsxwriter', xlsxwriter.__version__))
print(" %-12s: %s" % ('xlwt', xlwt.__VERSION__))
print("")
print("Dimensions:")
print(" Rows = %d" % row_max)
print(" Cols = %d" % col_max)
print("")
print("Times:")
time_pyexcelerate()
time_xlwt()
time_xlsxwriter_optimised()
time_xlsxwriter()
time_openpyxl_optimised()
time_openpyxl()
print("")
Versions:
python : 2.7.2
openpyxl : 2.2.1
pyexcelerate: 0.6.6
xlsxwriter : 0.7.2
xlwt : 1.0.0
Dimensions:
Rows = 10000
Cols = 50
Times:
pyexcelerate : 10.63
xlwt : 16.93
xlsxwriter (optimised): 20.37
xlsxwriter : 24.24
openpyxl (optimised): 26.63
openpyxl : 35.75
Versions:
python : 2.7.1
openpyxl : 2.2.5
pyexcelerate: 0.6.6
xlsxwriter : 0.7.3
xlwt : 1.0.0
Dimensions:
Rows = 10000
Cols = 50
Times:
pyexcelerate : 4.37
xlwt : 6.65
xlsxwriter (optimised): 7.75
xlsxwriter : 9.05
openpyxl (optimised): 10.07
openpyxl : 13.77
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment