Skip to content

Instantly share code, notes, and snippets.

@T99
Last active January 21, 2022 21:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save T99/bf7a7df3a1070cf23d53a6adb041a7f9 to your computer and use it in GitHub Desktop.
Save T99/bf7a7df3a1070cf23d53a6adb041a7f9 to your computer and use it in GitHub Desktop.
Convert XLSB files to CSVs, optionally specifying a sheet
#!/usr/bin/env python
import os
import sys
from pyxlsb import open_workbook
def to_string(s):
if s is None:
return ''
if isinstance(s, (str)):
s = s.replace("'", "''") # Replace single quotes with double-single quotes.
s = s.replace('"', '""') # Replace double quotes with double-double quotes.
s = s.strip()
if "," in s:
s = '"' + s + '"'
return s.encode("ascii")
return repr(s)
def write_sheet_to_csv(csv, workbook, sheet_name):
"""Writes a single worksheet of a workbook out to a CSV file.
:param str csv: A path at which the resulting CSV should be placed.
:param Workbook workbook: The source workbook which should contain the desired worksheet.
:param str sheet_name: The name of the worksheet to convert and write out to the specified CSV file.
"""
with open(csv, "w") as csv_file_handle, workbook.get_sheet(sheet_name) as sheet:
for row in sheet.rows():
csv_file_handle.write(",".join(map(to_string, [r.v for r in row])) + "\n")
def xlsb2csv(xlsb, csv, sheet_name = None):
"""Converts the XLSB file located at the specified path to one or more output CSV files.
If more than a single worksheet is being extracted from the source XLSB, multiple output files will be created,
using the original file name provided by the caller, but having been appended with the name of the given worksheet.
:param str xlsb: A path to the XLSB file to convert.
:param str csv: A base path at which resulting converted CSV files will be placed.
:param str | None sheet_name: An optional name for a specific sheet to extract from the source XLSB. If not provided
(or set to None), all sheets will be converted and output to their own files.
"""
if not os.path.exists(xlsb):
raise Exception("Could not find the input XLSB file at '" + xlsb + "'.")
with open_workbook(xlsb) as workbook:
if sheet_name is not None:
write_sheet_to_csv(csv, workbook, sheet_name)
else:
if len(workbook.sheets) > 1:
base_csv_file_name = os.path.splitext(os.path.basename(csv))[0]
for sheet_name in workbook.sheets:
write_sheet_to_csv(base_csv_file_name + "-" + sheet_name + ".csv", workbook, sheet_name)
else:
write_sheet_to_csv(csv, workbook, workbook.sheets[0])
if __name__ == '__main__':
if len(sys.argv) == 3:
xlsb2csv(sys.argv[1], sys.argv[2])
elif len(sys.argv) == 4:
xlsb2csv(sys.argv[1], sys.argv[2], sys.argv[3])
else:
print("Usage: xlsb2csv <xlsb_input_file> <csv_output_file> <sheet_name?>")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment