Convert xls to CSV
#!/usr/local/bin/python | |
import openpyxl | |
from openpyxl.utils.exceptions import InvalidFileException | |
import tornado.options | |
import os.path | |
import csv | |
import sys | |
import xlrd | |
import datetime | |
def _utf8(s): | |
"""encode a unicode string as utf-8""" | |
if isinstance(s, unicode): | |
return s.encode("utf-8") | |
assert isinstance(s, str), "_utf8 expected a str, not %r" % type(s) | |
return s | |
def to_string(v): | |
if v is None: | |
return '' | |
if isinstance(v, (int, long, float)): | |
return str(v) | |
if isinstance(v, datetime.datetime): | |
return v.strftime('%Y-%m-%d') | |
return _utf8(v) | |
if __name__ == "__main__": | |
tornado.options.define("input_file", default=None, type=str) | |
tornado.options.define("worksheet", default=0, type=int) | |
tornado.options.define("skip_header", default=False, type=bool) | |
tornado.options.parse_command_line() | |
o = tornado.options.options | |
assert os.path.exists(o.input_file) | |
w = csv.writer(sys.stdout) | |
try: | |
wb = openpyxl.load_workbook(filename=o.input_file, read_only=True) | |
ws = wb.worksheets[o.worksheet] | |
for i, row in enumerate(ws.rows): | |
if i == 0 and o.skip_header: | |
continue | |
w.writerow([to_string(cell.value) for cell in row]) | |
except InvalidFileException: | |
# retry with xlrd | |
wb = xlrd.open_workbook(filename=o.input_file) | |
# http://xlrd.readthedocs.io/en/latest/api.html | |
ws = wb.sheet_by_index(o.worksheet) | |
for i, row in enumerate(ws.get_rows()): | |
if i == 0 and o.skip_header: | |
continue | |
w.writerow([to_string(cell.value) for cell in row]) | |
sys.stdout.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment