Created
February 4, 2020 17:55
-
-
Save tchiavegatti/2bd39ea87ee2b83b8f2d048c9346eb2f to your computer and use it in GitHub Desktop.
[Combine Excel files] #pandas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from pathlib import Path | |
import pandas as pd | |
import xlsxwriter | |
# Set filename tag | |
tag = 'client' | |
# Set filepaths | |
data_folder = Path.cwd().joinpath('data').joinpath(tag) | |
output_folder = Path.cwd().joinpath('output') | |
source_files = [f for f in data_folder.glob('*.xls*')] | |
def make_df(files): | |
df = pd.concat( | |
pd.read_excel(f, | |
#sep='c0n\\$ult@xe', | |
header=0, | |
encoding='latin-1', | |
thousands=' ', | |
decimal=',', | |
index_col=False, | |
quotechar='"', | |
#names=names, | |
# skipfooter=1 | |
) for f in files) | |
return df | |
# Create and fix dataframe data types | |
df = make_df(source_files) | |
int_cols = list(df.select_dtypes(include=['int64']).columns) | |
df[int_cols] = df[int_cols].to_string() | |
float_cols = list(df.select_dtypes(include=['float64']).columns) | |
df[float_cols] = df[float_cols].to_string | |
money_cols = [ | |
'Montant des paiements', | |
'Mnt Facture', | |
'Mnt avant taxe', | |
'Mnt Article', | |
'Mnt TPS', | |
'Mnt TVQ', | |
'TPS réclamé', | |
'TVQ Réclamé', | |
'Taxe réclamé' | |
] | |
# Export dataframe to an Excel file | |
def get_column_rng(df, col_name): | |
""" | |
Get the corresponding Excel whole-column range address from the column name. | |
Needs the xlsxwriter.utility package | |
Attributes: | |
df: `Pandas df` | |
A dataframe | |
col_name: `str` | |
Name of the column | |
Example: | |
$col_rng = get_column_letter('Numéro compte') | |
$print(col_rng) | |
'B:B' | |
Returns: | |
Column range as str | |
""" | |
from xlsxwriter.utility import xl_col_to_name | |
cols = list(df.columns) | |
idx = cols.index(col_name) | |
col_letter = xlsxwriter.utility.xl_col_to_name(idx) | |
return(col_letter + ":" + col_letter) | |
def output_to_excel(df, output_file, currency_cols=money_cols): | |
# df: a pandas dataframe | |
# no_match: dataframe with data that didn't match | |
# descriptor: a string to be added to the sheet name | |
#Create writer | |
writer = pd.ExcelWriter(output_file, engine='xlsxwriter', | |
date_format='yyyy/mm/dd', | |
datetime_format='yyyy/mm/dd') | |
workbook = writer.book | |
# Money format | |
money_format = workbook.add_format({'num_format': '0.00'}) | |
col_list = df.columns.to_numpy().tolist() | |
df.to_excel(writer, sheet_name = 'Data', index = False) | |
worksheet = writer.sheets[sheet] | |
# Apply currency format to money columns | |
for col in money_cols: | |
if col in df.columns: | |
col_rng = get_column_rng(df, col) | |
worksheet.set_column(col_rng, None, money_format) | |
# Set workbook properties | |
workbook.set_properties({ | |
'title': 'CHU de Québec', | |
'company': 'Consultaxe', | |
'comments': 'Created with Python, Pandas and XlsxWriter'}) | |
workbook.set_custom_property('Client', 'CHU de Québec') | |
writer.save() | |
return print('Excel file created.') | |
filename = 'spreadsheet.xlsx' | |
output_to_excel(df, filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment