Last active
April 10, 2017 14:52
-
-
Save slhck/c34b1b1f2a944e53be2ade4712b7e762 to your computer and use it in GitHub Desktop.
XLS(X) to CSV Converter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
"""XLS to CSV converter | |
Converts XLS(X) worksheets to CSV files | |
Author: Werner Robitza <werner.robitza@gmail.com> | |
Requirements: | |
pip install openpyxl docopt | |
Usage: | |
xls_to_csv.py <input> [-o <output>] [-w <worksheet-index>]... | |
[-t] [-p] [-s <separator>] | |
[-v] [-f] [-n] | |
Options: | |
<input> Input file (.xls or .xlsx) | |
-o --output <output> Output folder (default: working directory) | |
-t --target-dir Set output folder to input file dir as target | |
-w --worksheet-index <worksheet-index> Indices of worksheets to export, will otherwise export all. Starts with 1. | |
-p --prefix Use input file prefix before output filename | |
-s --separator <separator> CSV separator [default: ,] | |
-v --verbose Enable verbose mode | |
-f --force Force overwriting | |
-n --dry-run Dry run (no actual commands will be run) | |
Examples: | |
xls_to_csv.py file.xlsx /path/to/output | |
xls_to_csv.py file.xlsx -w 1 -w 2 -f | |
History: | |
2017/04/10: Update to use new openpyxl syntaxa | |
2016/06/29: Initial version | |
""" | |
from docopt import docopt | |
import openpyxl | |
import csv | |
import os | |
args = dict() | |
def csv_from_excel(arguments): | |
workbook = openpyxl.load_workbook(arguments['<input>']) | |
for index, worksheet in enumerate(workbook.worksheets): | |
# skip filtered worksheets | |
if arguments['--worksheet-index'] \ | |
and (index + 1) not in [int(i) for i in arguments['--worksheet-index']]: continue | |
# skip empty worksheet | |
if worksheet.max_row == 0: continue | |
# set output directory | |
if arguments['--output']: | |
output_dir = arguments['--output'] | |
else: | |
output_dir = '.' | |
if arguments['--target-dir']: | |
output_dir = os.path.dirname(os.path.realpath(arguments['<input>'])) | |
# prepend file prefix | |
if arguments['--prefix']: | |
output_file_name = os.path.splitext(os.path.basename(arguments['<input>']))[0] + '-' + worksheet.title + '.csv' | |
else: | |
output_file_name = worksheet.title + '.csv' | |
# write to output dir | |
output_file_path = os.path.join(output_dir, output_file_name) | |
# skip existing files | |
if os.path.isfile(output_file_path) and not arguments['--force']: | |
print("[warning] file exists: " + output_file_path + ". Use -f/--force to overwrite.") | |
continue | |
# print and continue if dry run | |
if arguments['--dry-run']: | |
print("[info] would write to: " + output_file_path) | |
continue | |
if arguments['--verbose']: print("[info] writing to " + output_file_path) | |
# write to output | |
output_file = open(output_file_path, 'wb') | |
wr = csv.writer(output_file, quoting=csv.QUOTE_ALL, delimiter=arguments['--separator']) | |
for row in worksheet.iter_rows(): | |
wr.writerow( | |
[ unicode(cell.value).encode("utf-8") for cell in row ] | |
) | |
output_file.close() | |
if __name__ == "__main__": | |
arguments = docopt(__doc__, version='XLS to CSV Converter v0.1') | |
csv_from_excel(arguments) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment