Skip to content

Instantly share code, notes, and snippets.

@frainfreeze
Last active May 11, 2023 13:08
Show Gist options
  • Save frainfreeze/ba1612249293efc5bfb1e4c0cd549627 to your computer and use it in GitHub Desktop.
Save frainfreeze/ba1612249293efc5bfb1e4c0cd549627 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
__author__ = "Tomislav Kucar"
__email__ = "kucar.tomislav@gmail.com"
__status__ = "Prototype"
"""Excel sheet to database table extractor
This script allows the user to extract data from excel sheet,
transform it and load it into a database table.
This tool accepts excel (.xls, .xlsx) files for input and any
sqlalchemy supported database for output.
This script requires that `pandas` and `sqlalchemy` be installed
within the Python environment you are running this script in.
This file can also be imported as a module.
"""
import pandas as pd
from sqlalchemy import create_engine
def excel_extract(filepath, sheet, rm_rows, rm_columns, rm_tail, table_name, db):
'''
Extracts data from excel sheet, transforms it and loads it into database table.
Parameters:
filepath (str): path to excel file
sheet (int): excel sheet number
rm_rows (int): number of rows to trim from top
rm_columns (list): list of columns to remove
rm_tail (int): tail length to trim
table_name (str): database table name
db (sqlalchemy.engine.Engine): sqlalchemy engine instance
'''
df = pd.read_excel(filepath, sheet_name = sheet)
df.drop(df.head(rm_rows).index, inplace=True)
df.drop(df.columns[rm_columns], axis=1, inplace=True)
df.drop(df.tail(rm_tail).index, inplace=True)
# bug workaround, see https://github.com/pandas-dev/pandas/issues/34211
df.replace({'%': 'pct'}, regex=True, inplace=True)
df = df.rename(columns=df.iloc[0]).drop(df.index[0]) # set column names
df.to_sql(table_name, db, index=False)
if __name__ == "__main__":
filepath = "SpaceNK_20181222 2.0.xlsx"
table_name = "last_week_report"
engine = create_engine('postgresql://bluset:1234@localhost:5432/foreo_task', echo=False)
excel_extract(filepath, sheet=0, rm_rows=4, rm_columns=[0,1,4], rm_tail=1, table_name=table_name, db=engine)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment