Skip to content

Instantly share code, notes, and snippets.

@slhck
Last active April 10, 2017 14:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save slhck/c34b1b1f2a944e53be2ade4712b7e762 to your computer and use it in GitHub Desktop.
Save slhck/c34b1b1f2a944e53be2ade4712b7e762 to your computer and use it in GitHub Desktop.
XLS(X) to CSV Converter
#/usr/bin/env python
# -*- coding: utf-8 -*-
"""XLS to CSV converter
Converts XLS(X) worksheets to CSV files
Author: Werner Robitza <werner.robitza@gmail.com>
Requirements:
pip install openpyxl docopt
Usage:
xls_to_csv.py <input> [-o <output>] [-w <worksheet-index>]...
[-t] [-p] [-s <separator>]
[-v] [-f] [-n]
Options:
<input> Input file (.xls or .xlsx)
-o --output <output> Output folder (default: working directory)
-t --target-dir Set output folder to input file dir as target
-w --worksheet-index <worksheet-index> Indices of worksheets to export, will otherwise export all. Starts with 1.
-p --prefix Use input file prefix before output filename
-s --separator <separator> CSV separator [default: ,]
-v --verbose Enable verbose mode
-f --force Force overwriting
-n --dry-run Dry run (no actual commands will be run)
Examples:
xls_to_csv.py file.xlsx /path/to/output
xls_to_csv.py file.xlsx -w 1 -w 2 -f
History:
2017/04/10: Update to use new openpyxl syntaxa
2016/06/29: Initial version
"""
from docopt import docopt
import openpyxl
import csv
import os
args = dict()
def csv_from_excel(arguments):
workbook = openpyxl.load_workbook(arguments['<input>'])
for index, worksheet in enumerate(workbook.worksheets):
# skip filtered worksheets
if arguments['--worksheet-index'] \
and (index + 1) not in [int(i) for i in arguments['--worksheet-index']]: continue
# skip empty worksheet
if worksheet.max_row == 0: continue
# set output directory
if arguments['--output']:
output_dir = arguments['--output']
else:
output_dir = '.'
if arguments['--target-dir']:
output_dir = os.path.dirname(os.path.realpath(arguments['<input>']))
# prepend file prefix
if arguments['--prefix']:
output_file_name = os.path.splitext(os.path.basename(arguments['<input>']))[0] + '-' + worksheet.title + '.csv'
else:
output_file_name = worksheet.title + '.csv'
# write to output dir
output_file_path = os.path.join(output_dir, output_file_name)
# skip existing files
if os.path.isfile(output_file_path) and not arguments['--force']:
print("[warning] file exists: " + output_file_path + ". Use -f/--force to overwrite.")
continue
# print and continue if dry run
if arguments['--dry-run']:
print("[info] would write to: " + output_file_path)
continue
if arguments['--verbose']: print("[info] writing to " + output_file_path)
# write to output
output_file = open(output_file_path, 'wb')
wr = csv.writer(output_file, quoting=csv.QUOTE_ALL, delimiter=arguments['--separator'])
for row in worksheet.iter_rows():
wr.writerow(
[ unicode(cell.value).encode("utf-8") for cell in row ]
)
output_file.close()
if __name__ == "__main__":
arguments = docopt(__doc__, version='XLS to CSV Converter v0.1')
csv_from_excel(arguments)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment