Last active
November 15, 2019 14:54
-
-
Save jehiah/e228375470fbf08f6a76e2dc359bc8ff to your computer and use it in GitHub Desktop.
Convert xls to CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python | |
import openpyxl | |
from openpyxl.utils.exceptions import InvalidFileException | |
import tornado.options | |
import os.path | |
import csv | |
import sys | |
import xlrd | |
import datetime | |
def _utf8(s): | |
"""encode a unicode string as utf-8""" | |
if isinstance(s, unicode): | |
return s.encode("utf-8") | |
assert isinstance(s, str), "_utf8 expected a str, not %r" % type(s) | |
return s | |
def to_string(v): | |
if v is None: | |
return '' | |
if isinstance(v, (int, long, float)): | |
return str(v) | |
if isinstance(v, datetime.datetime): | |
return v.strftime('%Y-%m-%d') | |
return _utf8(v) | |
if __name__ == "__main__": | |
tornado.options.define("input_file", default=None, type=str) | |
tornado.options.define("worksheet", default=0, type=int) | |
tornado.options.define("skip_header", default=False, type=bool) | |
tornado.options.parse_command_line() | |
o = tornado.options.options | |
assert os.path.exists(o.input_file) | |
w = csv.writer(sys.stdout) | |
try: | |
wb = openpyxl.load_workbook(filename=o.input_file, read_only=True) | |
ws = wb.worksheets[o.worksheet] | |
for i, row in enumerate(ws.rows): | |
if i == 0 and o.skip_header: | |
continue | |
w.writerow([to_string(cell.value) for cell in row]) | |
except InvalidFileException: | |
# retry with xlrd | |
wb = xlrd.open_workbook(filename=o.input_file) | |
# http://xlrd.readthedocs.io/en/latest/api.html | |
ws = wb.sheet_by_index(o.worksheet) | |
for i, row in enumerate(ws.get_rows()): | |
if i == 0 and o.skip_header: | |
continue | |
w.writerow([to_string(cell.value) for cell in row]) | |
sys.stdout.close() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment