Created
October 16, 2012 15:18
-
-
Save joyrexus/3899904 to your computer and use it in GitHub Desktop.
Print the sheetnames or contents of a worksheet from an excel workbook (.xls file) to STDOUT
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
''' | |
xls2tsv - print the sheetnames or contents of a worksheet from an excel workbook. | |
Usage: xls2tsv [options] | |
Options: | |
-h, --help show this help message and exit | |
--sheet=SHEET name of worksheet to be printed | |
--sheetnames print all sheets in Excel file | |
''' | |
import sys | |
import xlrd | |
import optparse | |
parser = optparse.OptionParser() | |
parser.add_option('--sheet', default='Sheet1', help='name of worksheet to be printed') | |
parser.add_option('--sheetnames', action='store_true', default=False, help='print all | |
sheets in Excel file') | |
opts, files = parser.parse_args() | |
if opts.sheetnames: | |
'''Print names of worksheets in file(s)''' | |
if len(files) == 0: | |
sys.exit('Need to specify input file(s)') | |
else: | |
for file in files: | |
try: | |
wb = xlrd.open_workbook(file) | |
print "\n".join(wb.sheet_names()) | |
except: | |
sys.exit('bailed on ' + file) | |
else: | |
import re | |
dash_pt = re.compile(u"[–—]", re.UNICODE) | |
apostro_pt = re.compile(u"’", re.UNICODE) | |
apostro_t_pt = re.compile(u"稚", re.UNICODE) | |
ae_apostro_pt = re.compile(u"Æ", re.UNICODE) | |
grave_e_pt = re.compile(u"é", re.UNICODE) | |
umlaut_e_pt = re.compile(u"ë", re.UNICODE) | |
cap_a_pt = re.compile(u"â", re.UNICODE) | |
phi_e_pt = re.compile(u"Φ", re.UNICODE) | |
rho_a_pt = re.compile(u"Γ", re.UNICODE) | |
theta_e_pt = re.compile(u"Θ", re.UNICODE) | |
unknown_pt = re.compile(u"╟", re.UNICODE) | |
def asciify(string): | |
'''Replace common non-ascii chars with appropriate chars.''' | |
try: | |
return str(string) | |
except UnicodeEncodeError: | |
string = dash_pt.sub("–", string) | |
string = apostro_pt.sub("'", string) | |
string = apostro_t_pt.sub("'t", string) | |
string = ae_apostro_pt.sub("'", string) | |
string = grave_e_pt.sub("e", string) | |
string = umlaut_e_pt.sub("e", string) | |
string = cap_a_pt.sub("a", string) | |
string = phi_e_pt.sub("e", string) | |
string = rho_a_pt.sub("a", string) | |
string = theta_e_pt.sub("e", string) | |
string = unknown_pt.sub("", string) | |
return string.encode('utf-8', 'replace') | |
if len(files) == 0: | |
sys.exit('Need to specify input file(s)') | |
else: | |
for file in files: | |
try: | |
wb = xlrd.open_workbook(file) | |
except: | |
sys.exit('bailed on ' + file) | |
try: | |
sheet = wb.sheet_by_name(opts.sheet) | |
except: | |
sys.exit("could not open worksheet " + opts.sheet + " in " + file) | |
for row in range(sheet.nrows): | |
values = [asciify(i) for i in sheet.row_values(row)] | |
try: | |
print "\t".join(values) | |
except: | |
sys.exit("non-ascii values in " + file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment