Last active
January 31, 2020 05:27
-
-
Save unblevable/a1146c35df674470618a9a7caf94c719 to your computer and use it in GitHub Desktop.
Merge .xls files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import xlrd | |
import xlwt | |
from argparse import ArgumentParser, RawDescriptionHelpFormatter | |
from colorama import Fore, init | |
from xlutils.copy import copy | |
from xlutils.filter import XLRDReader, XLWTWriter, process | |
# Modified xutils.copy copy to preserve style information. | |
# def copy2 (wb): | |
# w = XLWTWriter() | |
# process(XLRDReader(wb, 'unknown.xls'), w) | |
# | |
# return w.output[0][1], w.style_list | |
def expand(path): | |
''' | |
Return a list of path(s) to .xls files based on whether the given path is | |
an .xls file or directory of .xls files. | |
''' | |
is_xls = lambda f: f.lower().endswith('.xls') | |
paths = [] | |
if os.path.isdir(path): | |
for filename in os.listdir(path): | |
if is_xls(filename): | |
paths.append(os.path.join(path, filename)) | |
elif os.path.isfile(path): | |
if is_xls(path): | |
paths.append(path) | |
return paths | |
def main(): | |
# Initialize colorized output. | |
init() | |
## 1. Parse the command-line arguments. ----------------------------------- | |
parser = ArgumentParser( | |
description='Merge .xls files (with identical columns) into one file.', | |
# Handle whitespace in the description and epilog | |
formatter_class=RawDescriptionHelpFormatter, | |
epilog=''' | |
Examples: | |
# Merge n number of sheets. | |
merge_xls.py sheet1.xls sheet2.xls sheet3.xls output.xls | |
# Merge a sheet along with all .xls files in a directory. | |
merge_xls.py sheet1.xls other_sheets/ output.xls | |
# Merge all .xls files in the current directory. | |
merge_xls.py *.xls output.xls | |
''', | |
) | |
parser.add_argument( | |
'paths', | |
help=''' | |
Path(s) to files or directories of .xlsx files. Globs, i.e. *.xlsx, are | |
supported as well. | |
''', | |
nargs='+', | |
) | |
parser.add_argument( | |
'output', | |
default='output.xls', | |
help='The merged .xls file', | |
) | |
parser.add_argument( | |
'--header', | |
default=0, | |
help='The (one-based) index of the header row', | |
type=int, | |
) | |
args = parser.parse_args() | |
filenames = [f for paths in [expand(p) for p in args.paths] for f in paths] | |
## 2. Create a blank output file to write to. ----------------------------- | |
output = args.output | |
if os.path.exists(output): | |
print(Fore.RED + f'{output} already exists.', file=sys.stderr) | |
sys.exit(1) | |
# output filename, output file extension | |
ofilename, ofile_extension = os.path.splitext(os.path.basename(output)) | |
if (ofile_extension != '.xls'): | |
print(Fore.RED + 'Specify an output .xls file.', file=sys.stderr) | |
sys.exit(1) | |
# output workbook, output style | |
# owb, ostyle = copy2( | |
# xlrd.open_workbook(filename=filenames[0], formatting_info=True), | |
# ) | |
# output workbook | |
owb = xlwt.Workbook() | |
# output worksheet | |
ows = owb.add_sheet(ofilename) | |
## 3. Merge the .xls files ------------------------------------------------ | |
header_index = args.header - 1 if args.header > 1 else 1 | |
# Index that keeps track of which row to write to in the output worksheet. | |
ows_row_index = 0 | |
# Index that keeps track of which row to begin reading from the input | |
# worksheets. | |
ws_start_index = 0 | |
for i, f in enumerate(filenames): | |
wb = xlrd.open_workbook(filename=f) | |
for j, s in enumerate(wb.sheet_names()): | |
ws = wb.sheet_by_name(s) | |
for r in range(ws.nrows): | |
if (r < ws_start_index): | |
continue | |
for c in range(ws.ncols): | |
if (r == header_index and i == 0 and j == 0): | |
# Copy the header row over the first time it's | |
# encountered. | |
ws_start_index = header_index + 1 | |
# Preserve the source cell's styling. | |
# style = ostyle[ws.cell_xf_index(r, c)] | |
ows.write(ows_row_index, c, ws.cell_value(r, c)) | |
ows_row_index += 1 | |
owb.save(output) | |
print( | |
Fore.GREEN + | |
f'Successfully merged {len(filenames)} spreadsheet(s) into {output}', | |
) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment