Skip to content

Instantly share code, notes, and snippets.

@aschleg
Last active March 20, 2022 20:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save aschleg/09cca5317a7de3e6a1e8 to your computer and use it in GitHub Desktop.
Save aschleg/09cca5317a7de3e6a1e8 to your computer and use it in GitHub Desktop.
Merge first worksheet in all Excel workbooks in a folder
from xlwings import Workbook, Range
import pandas as pd
import os
import re
# Script to merge a folder containing Excel workbooks into a single workbook.
# The folder should only contain Excel workbooks and must all either be in csv, xls or xlsx format
# To run, open the command prompt and enter the command python Merge_Excel_Workbooks.py
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
function to load data into Excel without overloading memory
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def chunk_df(df, sheet, startcell, chunk_size=1000):
if len(df) <= (chunk_size + 1):
Range(sheet, startcell, index=False, header=True).value = df
else:
Range(sheet, startcell, index=False).value = list(df.columns)
c = re.match(r"([a-z]+)([0-9]+)", startcell[0] + str(int(startcell[1]) + 1), re.I)
row = c.group(1)
col = int(c.group(2))
for chunk in (df[rw:rw + chunk_size] for rw in
range(0, len(df), chunk_size)):
Range(sheet, row + str(col), index=False, header=False).value = chunk
col += chunk_size
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Script
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
# Prompt to enter the folder path.
# Only Excel Workbooks should be included in the folder path and all should either be in csv, xls or xlsx format
workbook_folder = raw_input('Enter folder path of workbooks to merge: ')
workbook_folder = os.path.normpath(workbook_folder)
passback_templates = os.listdir(workbook_folder)
temp_to_merge = []
for i in passback_templates:
temp_to_merge.append(workbook_folder + '/' + str(i))
merged_workbooks = pd.DataFrame()
# 0 in the read_excel method refers to the index of the sheet. The first
# sheet in the workbook has a 0 index.
for sheet in temp_to_merge:
tab = raw_input('Enter worksheet name to merge: ')
if sheet[:-4] == '.csv':
workbook = pd.read_csv(sheet, index_col=None, na_values=[0])
merged_workbooks = merged_workbooks.append(workbook)
else:
workbook = pd.read_excel(sheet, tab, index_col=None, na_values=[0])
merged_workbooks = merged_workbooks.append(workbook)
merged_workbooks.fillna(0, inplace = True)
# A new workbook will open and copy the merged data.
wb = Workbook()
chunk_df(merged_workbooks, 'Sheet1', 'A1')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment