Skip to content

Instantly share code, notes, and snippets.

@unblevable
Last active January 31, 2020 05:27
Show Gist options
  • Save unblevable/a1146c35df674470618a9a7caf94c719 to your computer and use it in GitHub Desktop.
Save unblevable/a1146c35df674470618a9a7caf94c719 to your computer and use it in GitHub Desktop.
Merge .xls files
import os
import sys
import xlrd
import xlwt
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from colorama import Fore, init
from xlutils.copy import copy
from xlutils.filter import XLRDReader, XLWTWriter, process
# Modified xutils.copy copy to preserve style information.
# def copy2 (wb):
# w = XLWTWriter()
# process(XLRDReader(wb, 'unknown.xls'), w)
#
# return w.output[0][1], w.style_list
def expand(path):
'''
Return a list of path(s) to .xls files based on whether the given path is
an .xls file or directory of .xls files.
'''
is_xls = lambda f: f.lower().endswith('.xls')
paths = []
if os.path.isdir(path):
for filename in os.listdir(path):
if is_xls(filename):
paths.append(os.path.join(path, filename))
elif os.path.isfile(path):
if is_xls(path):
paths.append(path)
return paths
def main():
# Initialize colorized output.
init()
## 1. Parse the command-line arguments. -----------------------------------
parser = ArgumentParser(
description='Merge .xls files (with identical columns) into one file.',
# Handle whitespace in the description and epilog
formatter_class=RawDescriptionHelpFormatter,
epilog='''
Examples:
# Merge n number of sheets.
merge_xls.py sheet1.xls sheet2.xls sheet3.xls output.xls
# Merge a sheet along with all .xls files in a directory.
merge_xls.py sheet1.xls other_sheets/ output.xls
# Merge all .xls files in the current directory.
merge_xls.py *.xls output.xls
''',
)
parser.add_argument(
'paths',
help='''
Path(s) to files or directories of .xlsx files. Globs, i.e. *.xlsx, are
supported as well.
''',
nargs='+',
)
parser.add_argument(
'output',
default='output.xls',
help='The merged .xls file',
)
parser.add_argument(
'--header',
default=0,
help='The (one-based) index of the header row',
type=int,
)
args = parser.parse_args()
filenames = [f for paths in [expand(p) for p in args.paths] for f in paths]
## 2. Create a blank output file to write to. -----------------------------
output = args.output
if os.path.exists(output):
print(Fore.RED + f'{output} already exists.', file=sys.stderr)
sys.exit(1)
# output filename, output file extension
ofilename, ofile_extension = os.path.splitext(os.path.basename(output))
if (ofile_extension != '.xls'):
print(Fore.RED + 'Specify an output .xls file.', file=sys.stderr)
sys.exit(1)
# output workbook, output style
# owb, ostyle = copy2(
# xlrd.open_workbook(filename=filenames[0], formatting_info=True),
# )
# output workbook
owb = xlwt.Workbook()
# output worksheet
ows = owb.add_sheet(ofilename)
## 3. Merge the .xls files ------------------------------------------------
header_index = args.header - 1 if args.header > 1 else 1
# Index that keeps track of which row to write to in the output worksheet.
ows_row_index = 0
# Index that keeps track of which row to begin reading from the input
# worksheets.
ws_start_index = 0
for i, f in enumerate(filenames):
wb = xlrd.open_workbook(filename=f)
for j, s in enumerate(wb.sheet_names()):
ws = wb.sheet_by_name(s)
for r in range(ws.nrows):
if (r < ws_start_index):
continue
for c in range(ws.ncols):
if (r == header_index and i == 0 and j == 0):
# Copy the header row over the first time it's
# encountered.
ws_start_index = header_index + 1
# Preserve the source cell's styling.
# style = ostyle[ws.cell_xf_index(r, c)]
ows.write(ows_row_index, c, ws.cell_value(r, c))
ows_row_index += 1
owb.save(output)
print(
Fore.GREEN +
f'Successfully merged {len(filenames)} spreadsheet(s) into {output}',
)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment