Skip to content

Instantly share code, notes, and snippets.

@martinyung
Last active January 29, 2022 14:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save martinyung/6d9d126cec4c63d9c95a96129eee3862 to your computer and use it in GitHub Desktop.
Save martinyung/6d9d126cec4c63d9c95a96129eee3862 to your computer and use it in GitHub Desktop.
python etl
def etl(query, source_cnx, target_cnx):
# extract data from source db
source_cursor = source_cnx.cursor()
source_cursor.execute(query.extract_query)
data = source_cursor.fetchall()
source_cursor.close()
# load data into warehouse db
if data:
target_cursor = target_cnx.cursor()
target_cursor.execute("USE {}".format(datawarehouse_name))
target_cursor.executemany(query.load_query, data)
print('data loaded to warehouse db')
target_cursor.close()
else:
print('data is empty')
def etl_process(queries, target_cnx, source_db_config, db_platform):
# establish source db connection
if db_platform == 'mysql':
source_cnx = mysql.connector.connect(**source_db_config)
elif db_platform == 'sqlserver':
source_cnx = pyodbc.connect(**source_db_config)
elif db_platform == 'firebird':
source_cnx = fdb.connect(**source_db_config)
else:
return 'Error! unrecognised db platform'
# loop through sql queries
for query in queries:
etl(query, source_cnx, target_cnx)
# close the source db connection
source_cnx.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment