Skip to content

Instantly share code, notes, and snippets.

@jjjake
Last active March 19, 2022 15:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jjjake/c8d7b81e5d009c12c1850c7fae0c3c65 to your computer and use it in GitHub Desktop.
Save jjjake/c8d7b81e5d009c12c1850c7fae0c3c65 to your computer and use it in GitHub Desktop.
This script converts a Microsoft Excel spreadsheet to a UTF-8 CSV file.
#!/usr/bin/env python
"""Convert a Microsoft Excel spreadsheet to a UTF-8 csv.
Usage:
# Make sure requrirements are installed.
$ sudo pip install xlrd backports.csv
# Run script.
$ python convert_xls_to_utf8_csv.py <spreadsheet>
The CSV will be saved to the same path as the original file, but with
'.csv' appended. For example, '~/spreadsheets/foo.xlsx' would be saved
to '~/spreadsheets/foo.xlsx.csv'.
"""
import sys
import csv
import json
import io
import xlrd
import backports.csv as csv
def convert_xls_to_dict(filepath, sheet_index=0):
"""Convert a Microsoft Word spreadsheet to a python dict."""
workbook = xlrd.open_workbook(filepath)
worksheet = workbook.sheet_by_index(sheet_index)
first_row = list()
for col in range(worksheet.ncols):
first_row.append(worksheet.cell_value(0, col))
data = list()
for row in range(1, worksheet.nrows):
elm = dict()
for col in range(worksheet.ncols):
elm_key = first_row[col].split(':', 1)[-1].lower()
elm[elm_key] = worksheet.cell_value(row, col)
data.append(elm)
return data
if __name__ == '__main__':
with io.open('{}.csv'.format(sys.argv[-1]), 'w', newline='', encoding='utf-8') as fh:
writer = csv.writer(fh)
for i, row in enumerate(convert_xls_to_dict(sys.argv[-1])):
# Write header if first row.
if i == 0:
writer.writerow(row.keys())
writer.writerow(row.values())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment