Skip to content

Instantly share code, notes, and snippets.

@skchronicles
Created April 21, 2020 21:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save skchronicles/a845af3100f992d3250bf44689767c92 to your computer and use it in GitHub Desktop.
Save skchronicles/a845af3100f992d3250bf44689767c92 to your computer and use it in GitHub Desktop.
xlsx to tsv writer
from __future__ import print_function
import pandas as pd
import sys
usage = '''\
USAGE:
python xlsx_reader.py input.xlsx output_file_prefix [-h]
Positional Arguments:
[1] Type [File]: Input XLSX filename to convert to TSV. Note: XLSX files with multiple worksheets are separated into multiple TSV files.
[2] Type [String]: Prefix of output filename. Note: An additional suffix containing the worksheet name will also be appended.
Example: Note 'huPlasma_36-plex.xlsx' contains two worksheets: Results and QC
$ python xlsx_reader.py huPlasma_36-plex.xlsx test
# Returns TSV file for each worksheet
> test_Results.txt
> test_QC.txt
Requirements:
numpy==1.18.3
pandas==1.0.3
python-dateutil==2.8.1
pytz==2019.3
six==1.14.0
xlrd==1.2.0
'''
def write(inputfile, outprefix):
'''Takes input XLSX filename and output file prefix to create multiple TSV files for each worksheet in the XSLX file.'''
sheet2df = pd.read_excel(inputfile, sheet_name=None, header=None)
print('Found Worksheets:')
for sheet, data in sheet2df.items():
print(" - {}".format(sheet))
outfile = sheet2df[sheet]
outfile.to_csv("{}_{}.txt".format(outprefix, sheet),sep='\t',encoding='utf-8', header=None, index=False)
def main():
# Parse args
if '-h' in sys.argv or '--help' in sys.argv:
print(usage)
sys.exit()
try:
filename = sys.argv[1] # Input xlsx file
ofn = sys.argv[2] # Output filename prefix
except IndexError:
print(usage)
sys.exit()
write(filename, ofn)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment