Skip to content

Instantly share code, notes, and snippets.

@chancyk
Last active August 24, 2023 00:24
Show Gist options
  • Save chancyk/4ed5ca67693cd9d1f40f to your computer and use it in GitHub Desktop.
Save chancyk/4ed5ca67693cd9d1f40f to your computer and use it in GitHub Desktop.
Python context manager for quickly writing tabular data to an Excel worksheet using xlwt or openpyxl.
import os
import re
import tempfile
from subprocess import Popen
from collections import OrderedDict
import xlrd
import xlwt
import xlutils
import openpyxl
from openpyxl.utils import get_column_letter
from openpyxl.styles import Alignment, Font
__all__ = ['WorksheetWriter', 'Excel', 'NewExcel', 'ExistingExcel', 'TempExcel',
'to_excel', 'to_excel_temp', 'to_xl', 'to_xl_tmp']
EXCEL = r'EXCEL.EXE'
EXCEL2003_32 = r'C:\Program Files (x86)\Microsoft Office\Office11\EXCEL.EXE'
EXCEL2007_32 = r'C:\Program Files (x86)\Microsoft Office\Office12\EXCEL.EXE'
EXCEL2007_64 = r'C:\Program Files\Microsoft Office\Office12\EXCEL.EXE'
EXCEL2010_32 = r'C:\Program Files (x86)\Microsoft Office\Office14\EXCEL.EXE'
EXCEL2010_64 = r'C:\Program Files\Microsoft Office\Office14\EXCEL.EXE'
EXCEL_PATHS = [EXCEL2010_64, EXCEL2010_32, EXCEL2007_64,
EXCEL2007_32, EXCEL2003_32, EXCEL]
DEFAULT_DOC_TYPE = '.xlsx'
class WorksheetWriter(object):
"""Writes tabular data by row or by table to a worksheet. Additional sheets
are automatically created for the XLS format since there is a 65536 row
limit."""
def __init__(self, workbook, sheet):
self.workbook = workbook
self.sheet = sheet
self.current_row = 0
self.overflow_idx = 2
self.num_cols = None
self.has_header = False
self._is_xlsx = isinstance(self.sheet, openpyxl.worksheet.Worksheet)
def _write(self, row, col, value):
"""Provide consistent write interface between xlwt and openpyxl."""
if self._is_xlsx:
c = self.sheet.cell(row=row+1, column=col+1)
c.value = value
else:
self.sheet.write(row, col, value)
def writeheader(self, values):
"""Write the header using writerow() and track that a header has been
written.
"""
self.has_header = True
self.writerow(values)
def writerow(self, values):
"""Write data by rows. For XLS files, a new sheet with a _[#] suffix
will be created for every 65536 rows.
"""
if self.num_cols is None:
self.num_cols = len(values)
for col, value in enumerate(values):
try:
self._write(self.current_row, col, value)
except ValueError as e:
if e.message.find('65536') >= 0:
new_sheet = u'%s_%s' % (self.sheet.name, self.overflow_idx)
self.sheet = self.workbook.create_sheet(title=new_sheet)
self.current_row = 0
self.overflow_idx += 1
self._write(self.current_row, col, value)
else:
raise
self.current_row += 1
def writerows(self, rows):
for row in rows:
self.writerow(row)
def apply_style(self):
"""Bold and center the header, and apply an auto filter."""
if self._is_xlsx == False:
raise Exception('Filters only work for XLSX docs.')
if self.num_cols is None:
raise Exception('A row must be written first.')
font = Font(bold=True)
alignment = Alignment(horizontal='center')
header_range = "A1:%s1" % get_column_letter(self.num_cols)
filter_range = "A1:%s%s" % (get_column_letter(self.num_cols),
self.current_row)
self.sheet.auto_filter.ref = filter_range
for row in self.sheet[header_range]:
for c in row:
c.font = font
c.alignment = alignment
class Excel(object):
"""A convience wrapper around xlwt and openpyxl writing facilities to create
a simple interface for writing tabular data to Excel for both the XLS and
XLSX formats.
Notes
-----
+ Sheetnames are mapped as Python friendly names to the Excel object.
+ NewExcel, New, ExistingExcel, and Existing are provided as wrappers
around Excel instead of passing the use_existing parameter.
+ An Excel installation is searched by looking at default installation
paths from newer to older versions of Excel and from 64-bit to 32-bit,
and finally a PATH relative attempt is made.
+ Temp, TempExcel, to_excel_temp, and to_xl_tmp are provided for quickly
writing tabular data to an Excel temporary file without having to specify
a filename.
+ For XLSX files, writeheader() will apply an auto filter, and bold and
center the header row.
Usage
-----
>>> with Excel("test.xlsx", ["Data", "Summary"]) as workbook:
... # Access the sheets using the Python friendly method mapping.
... workbook.data.writeheader(['A', 'B', 'C'])
... workbook.data.writerows([[1,2,3],[4,5,6]])
... # Access the sheets in the order they were applied by index.
... workbook[0].writerows([7,8,9])
... # Quickly access the first sheet, equivalent to workbook[0] above.
... workbook.writerows([7,8,9])
... # Access the sheet using the actual sheetname provided.
... workbook["Summary"].writerows([5,7,9])
>>> # Write data to a temporary file and open it in Excel, deleted on exit.
>>> with Temp() as wb:
... wb.writeheader(['A', 'B', 'C'])
... wb.writerows([[1,2,3],[4,5,6]])
>>> # Function version of the above.
>>> to_excel_temp([[1,2,3],[4,5,6]], header=['A', 'B', 'C'])
>>> # Creates default 'Sheet1'. workbook.save() must be called. Fails if
>>> # test.xlsx already exists.
>>> workbook = NewExcel('test.xlsx')
>>> workbook.sheet1.writerow([1,2,3])
>>> workbook.save()
"""
def __init__(self, file_or_path, sheetnames=None, use_existing=False,
encoding='utf-8', excel_paths=EXCEL_PATHS,
doc_type=DEFAULT_DOC_TYPE):
"""
Parameters
----------
file_or_path : str or file-like
The path to an Excel file to use or create, or a stream to write to.
sheetnames : str or list
The name of the sheet or a list of sheet names.
use_existing : bool
Use an existing file if it already exists. Raises an exception if the
file already exists and use_existing is False.
excel_paths : list of paths
The list of paths to try in order for the location of EXCEL.EXE
doc_type : str
The Excel document type to use. '.xls' or '.xlsx'
"""
self._file_or_path = file_or_path
self._use_existing = use_existing
self._encoding = encoding
self._excel_paths = excel_paths
self._doc_type = doc_type
self._pre_existing = False
self._sheets = []
self._worksheet_writers = OrderedDict()
self._open_workbook()
if isinstance(sheetnames, str):
sheetnames = [sheetnames]
else:
sheetnames = sheetnames or ['Sheet1']
self.add_sheets(sheetnames)
def _to_python_friendly(self, s):
"""Convert to Python friendly version of the sheetname.
+ Strip leading and trailing spaces
+ Spaces to single spaces.
+ Hyphens and spaces to underscore.
+ Remove invalid characters.
+ Remove leading characters until a letter is found.
+ Convert to lowercase.
+ Multiple underscores to single underscore.
"""
# Strip leading and trailing spaces.
s = s.strip()
# Spaces to single space.
s = re.sub(' +', ' ', s)
# Hyphens and spaces to underscore.
s = re.sub(r'[\- ]', '_', s)
# Remove invalid characters.
s = re.sub('[^0-9a-zA-Z_]', '', s)
# Remove leading characters until we find a letter.
s = re.sub('^[^a-zA-Z]+', '', s)
# Lowercase.
s = s.lower()
# Multiple underscores to one.
s = re.sub('_+', '_', s)
return s
def __getitem__(self, index):
"""Return the WorksheetWriters in the order they were added to the
workbook.
"""
if isinstance(index, int):
worksheet_writer = self._worksheet_writers.values()[index]
elif isinstance(index, str):
worksheet_writer = self._worksheet_writers[index]
else:
raise ValueError("%s is not a valid index." % index)
return worksheet_writer
def add_sheet(self, sheetname):
"""Add a new sheet and WorksheetWriter, and append the Python friendly
sheetnames to this object.
"""
if sheetname in self._sheets:
raise ValueError("Worksheet '%s' already exists")
self._sheets.append(sheetname)
sheet = self.workbook.create_sheet(title=sheetname)
worksheet_writer = WorksheetWriter(self.workbook, sheet)
self._worksheet_writers[sheetname] = worksheet_writer
method_name = self._to_python_friendly(sheetname)
if method_name in dir(self):
raise ValueError(
"Worksheet name '%s' collides with an existing method. "\
"Choose a different worksheet name." % method_name
)
else:
self.__dict__[method_name] = worksheet_writer
def add_sheets(self, sheetnames):
"""Add new WorksheetWriters for the list of sheetnames."""
for sheetname in sheetnames:
self.add_sheet(sheetname)
@property
def has_header(self):
return self[0].has_header
@has_header.setter
def has_header(self, x):
self[0].has_header = x
def writeheader(self, values):
"""Convenience wrapper for calling writeheader() on the first sheet,
e.g.:
>>> workbook[0].writeheader([1,2,3])
"""
self[0].writeheader(values)
def writerow(self, values):
"""Convenience wrapper for calling writerow() on the first sheet, e.g.:
>>> workbook[0].writerow([1,2,3])
"""
self[0].writerow(values)
def writerows(self, rows):
"""Convenience wrapper for calling writerows() on the first sheet, e.g.:
>>> workbook[0].writerows([1,2,3])
"""
self[0].writerows(rows)
@property
def _use_xlsx(self):
return self._doc_type == '.xlsx'
def _xl_open_workbook(self):
"""Opens a workbook using either xlrd or openpyxl, and monkey patches
the resulting workbook objects to have the same interface for our
purposes.
"""
if self._use_xlsx:
wb = openpyxl.load_workbook(self._file_or_path)
wb.__dict__['sheet_by_name'] = wb.get_sheet_by_name
else:
wb = xlutils.copy(xlrd.open_workbook(self._file_or_path))
wb.__dict__['create_sheet'] = lambda title: wb.add_sheet(title)
return wb
def _xl_new_workbook(self):
"""Opens a new workbook using either xlrd or openpyxl, and monkey
patches the resulting workbook objects to have the same interface for
our purposes.
"""
if self._use_xlsx:
wb = openpyxl.Workbook(encoding=self._encoding)
wb.__dict__['sheet_by_name'] = wb.get_sheet_by_name
wb.remove_sheet(wb.get_sheet_by_name('Sheet'))
else:
wb = xlwt.Workbook(encoding=self._encoding)
wb.__dict__['create_sheet'] = lambda title: wb.add_sheet(title)
return wb
def _open_workbook(self):
"""Return either a xlwt or openpyxl Workbook object that has been monkey
patched to have a consistent interface for the necessary methods.
"""
if isinstance(self._file_or_path, str):
if os.path.exists(self._file_or_path):
if self._use_existing:
workbook = self._xl_open_workbook()
for sheetname in self._sheets:
try:
workbook.sheet_by_name(sheetname)
except:
pass
else:
raise IOError("Worksheet '%s' already exists." \
% sheetname)
else:
raise IOError("The file '%s' already exists." \
% self._file_or_path)
else:
workbook = self._xl_new_workbook()
else:
workbook = self._xl_new_workbook()
self.workbook = workbook
def save(self):
"""Save the workbook. Also applies the final style to XLSX files."""
if self._use_xlsx:
for sheet in self._sheets:
worksheet_writer = self._worksheet_writers[sheet]
if worksheet_writer.has_header:
worksheet_writer.apply_style()
if isinstance(self._file_or_path, str):
if os.path.exists(self._file_or_path):
os.remove(self._file_or_path)
self.workbook.save(self._file_or_path)
else:
self.workbook.save(self._file_or_path)
self._file_or_path.close()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
if exc_type == None:
self.save()
class New(Excel):
"""A convenience wrapper for Excel(..., use_existing=False)."""
def __init__(self, file_or_path, sheetname=None):
super(New, self).__init__(file_or_path, sheetname, use_existing=False)
NewExcel = New
class Existing(Excel):
"""A convenience wrapper for Excel(..., use_existing=True)."""
def __init__(self, file_or_path, sheetname=None):
super(Existing, self).__init__(file_or_path, sheetname,
use_existing=True)
ExistingExcel = Existing
class Temp(Excel):
"""Passes a temp file to Excel() and spawns the resulting workbook in the
EXCEL.EXE application.
"""
def __init__(self, delete=True, doc_type=DEFAULT_DOC_TYPE):
self.delete = delete
self.doc_type = doc_type
self.tempfile = \
tempfile.NamedTemporaryFile(suffix=self.doc_type, delete=False)
super(Temp, self).__init__(self.tempfile.file, doc_type=self.doc_type)
def __exit__(self, exc_type, exc_value, traceback):
super(Temp, self).__exit__(exc_type, exc_value, traceback)
tempfile_path = os.path.abspath(self.tempfile.name)
self.tempfile.close()
p = None
for path in self._excel_paths:
try:
p = Popen([path, tempfile_path])
except:
pass
else:
break
if p:
print '... waiting for Excel to exit ...'
p.wait()
if os.path.exists(tempfile_path) and self.delete:
os.remove(tempfile_path)
else:
if os.path.exists(tempfile_path) and self.delete:
os.remove(tempfile_path)
raise Exception("Excel could not be found.")
TempExcel = Temp
def to_excel(rows, file_or_path,
header=None, sheetname=None, use_existing=False):
"""A convenience wrapper around Excel to create a workbook with a function
rather than with a context manager.
"""
with Excel(file_or_path, sheetname, use_existing) as wb:
if header is True:
wb[sheetname].has_header = header
elif header:
wb[sheetname].writeheader(header)
wb[sheetname].writerows(rows)
to_xl = to_excel
def to_excel_temp(rows, header=None):
"""A convenience wrapper around Temp to create a workbook with a function
rather than with a context manager. Creates a temporary file and opens the
resulting file in the EXCEL.EXE application.
"""
with Temp() as wb:
if header is True:
wb.has_header = header
elif header:
wb.writeheader(header)
wb.writerows(rows)
to_xl_tmp = to_excel_temp
if __name__ == '__main__':
to_excel_temp([['A', 'B', 'C'], [1,2,3]], True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment