Skip to content

Instantly share code, notes, and snippets.

@jehiah
Last active November 15, 2019 14:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jehiah/e228375470fbf08f6a76e2dc359bc8ff to your computer and use it in GitHub Desktop.
Save jehiah/e228375470fbf08f6a76e2dc359bc8ff to your computer and use it in GitHub Desktop.
Convert xls to CSV
#!/usr/local/bin/python
import openpyxl
from openpyxl.utils.exceptions import InvalidFileException
import tornado.options
import os.path
import csv
import sys
import xlrd
import datetime
def _utf8(s):
"""encode a unicode string as utf-8"""
if isinstance(s, unicode):
return s.encode("utf-8")
assert isinstance(s, str), "_utf8 expected a str, not %r" % type(s)
return s
def to_string(v):
if v is None:
return ''
if isinstance(v, (int, long, float)):
return str(v)
if isinstance(v, datetime.datetime):
return v.strftime('%Y-%m-%d')
return _utf8(v)
if __name__ == "__main__":
tornado.options.define("input_file", default=None, type=str)
tornado.options.define("worksheet", default=0, type=int)
tornado.options.define("skip_header", default=False, type=bool)
tornado.options.parse_command_line()
o = tornado.options.options
assert os.path.exists(o.input_file)
w = csv.writer(sys.stdout)
try:
wb = openpyxl.load_workbook(filename=o.input_file, read_only=True)
ws = wb.worksheets[o.worksheet]
for i, row in enumerate(ws.rows):
if i == 0 and o.skip_header:
continue
w.writerow([to_string(cell.value) for cell in row])
except InvalidFileException:
# retry with xlrd
wb = xlrd.open_workbook(filename=o.input_file)
# http://xlrd.readthedocs.io/en/latest/api.html
ws = wb.sheet_by_index(o.worksheet)
for i, row in enumerate(ws.get_rows()):
if i == 0 and o.skip_header:
continue
w.writerow([to_string(cell.value) for cell in row])
sys.stdout.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment