Skip to content

Instantly share code, notes, and snippets.

@Themanwithoutaplan
Forked from jmcnamara/bench_excel_writers.py
Last active August 29, 2015 14:16
Show Gist options
  • Save Themanwithoutaplan/164a8d005c2ed3b9c3bf to your computer and use it in GitHub Desktop.
Save Themanwithoutaplan/164a8d005c2ed3b9c3bf to your computer and use it in GitHub Desktop.
##############################################################################
#
# Simple Python program to benchmark several Python Excel writing modules.
#
# python bench_excel_writers.py [num_rows] [num_cols]
#
#
import sys
from time import clock
import openpyxl
import pyexcelerate
import xlsxwriter
import xlwt
from openpyxl.cell import get_column_letter
# Default to 1000 rows x 50 cols.
if len(sys.argv) > 1:
row_max = int(sys.argv[1])
col_max = 50
else:
row_max = 1000
col_max = 50
if len(sys.argv) > 2:
col_max = int(sys.argv[2])
def print_elapsed_time(module_name, elapsed):
""" Print module run times in a consistent format. """
print(" %-22s: %6.2f" % (module_name, elapsed))
def time_xlsxwriter():
""" Run XlsxWriter in default mode. """
start_time = clock()
workbook = xlsxwriter.Workbook('xlsxwriter.xlsx')
worksheet = workbook.add_worksheet()
for row in range(row_max // 2):
for col in range(col_max):
worksheet.write_string(row * 2, col, "Row: %d Col: %d" % (row, col))
for col in range(col_max):
worksheet.write_number(row * 2 + 1, col, row + col)
workbook.close()
elapsed = clock() - start_time
print_elapsed_time('xlsxwriter', elapsed)
def time_xlsxwriter_optimised():
""" Run XlsxWriter in optimised/constant memory mode. """
start_time = clock()
workbook = xlsxwriter.Workbook('xlsxwriter_opt.xlsx',
{'constant_memory': True})
worksheet = workbook.add_worksheet()
for row in range(row_max // 2):
for col in range(col_max):
worksheet.write_string(row * 2, col, "Row: %d Col: %d" % (row, col))
for col in range(col_max):
worksheet.write_number(row * 2 + 1, col, row + col)
workbook.close()
elapsed = clock() - start_time
print_elapsed_time('xlsxwriter (optimised)', elapsed)
def time_openpyxl():
""" Run OpenPyXL in default mode. """
start_time = clock()
workbook = openpyxl.workbook.Workbook()
worksheet = workbook.active
for row in range(row_max // 2):
string_data = ["Row: %d Col: %d" % (row, col) for col in range(col_max)]
worksheet.append(string_data)
num_data = [row + col for col in range(col_max)]
worksheet.append(num_data)
workbook.save('openpyxl.xlsx')
elapsed = clock() - start_time
print_elapsed_time('openpyxl', elapsed)
def time_openpyxl_optimised():
""" Run OpenPyXL in optimised mode. """
start_time = clock()
workbook = openpyxl.workbook.Workbook(optimized_write=True)
worksheet = workbook.create_sheet()
for row in range(row_max // 2):
string_data = ["Row: %d Col: %d" % (row, col) for col in range(col_max)]
worksheet.append(string_data)
num_data = [row + col for col in range(col_max)]
worksheet.append(num_data)
workbook.save('openpyxl_opt.xlsx')
elapsed = clock() - start_time
print_elapsed_time('openpyxl (optimised)', elapsed)
def time_pyexcelerate():
""" Run pyexcelerate in "faster" mode. """
start_time = clock()
workbook = pyexcelerate.Workbook()
worksheet = workbook.new_sheet('Sheet1')
for row in range(row_max // 2):
for col in range(col_max):
worksheet.set_cell_value(row * 2 + 1, col + 1, "Row: %d Col: %d" % (row, col))
for col in range(col_max):
worksheet.set_cell_value(row * 2 + 2, col + 1, row + col)
workbook.save('pyexcelerate.xlsx')
elapsed = clock() - start_time
print_elapsed_time('pyexcelerate', elapsed)
def time_xlwt():
""" Run xlwt in default mode. """
start_time = clock()
workbook = xlwt.Workbook()
worksheet = workbook.add_sheet('Sheet1')
for row in range(row_max // 2):
for col in range(col_max):
worksheet.write(row * 2, col, "Row: %d Col: %d" % (row, col))
for col in range(col_max):
worksheet.write(row * 2 + 1, col, row + col)
workbook.save('xlwt.xls')
elapsed = clock() - start_time
print_elapsed_time('xlwt', elapsed)
print("")
print("Versions:")
print(" %-12s: %s" % ('python', sys.version[:5]))
print(" %-12s: %s" % ('openpyxl', openpyxl.__version__))
print(" %-12s: %s" % ('pyexcelerate', pyexcelerate.__version__))
print(" %-12s: %s" % ('xlsxwriter', xlsxwriter.__version__))
print(" %-12s: %s" % ('xlwt', xlwt.__VERSION__))
print("")
print("Dimensions:")
print(" Rows = %d" % row_max)
print(" Cols = %d" % col_max)
print("")
print("Times:")
time_pyexcelerate()
if col_max > 256:
print("More than 256 columns, skipping xlwt")
else:
time_xlwt()
time_xlsxwriter_optimised()
time_xlsxwriter()
time_openpyxl_optimised()
time_openpyxl()
print("")
Versions:
python : 2.7.9
openpyxl : 2.3.0
pyexcelerate: 0.6.7
xlsxwriter : 0.6.6
xlwt : 0.7.5
Dimensions:
Rows = 10000
Cols = 50
Times:
pyexcelerate : 11.28
xlwt : 14.03
xlsxwriter (optimised): 18.25
xlsxwriter : 22.10
openpyxl (optimised): 24.98
openpyxl : 30.07
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment