Skip to content

Instantly share code, notes, and snippets.

@duchenpaul
Created February 22, 2020 14:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save duchenpaul/68dba2f0f314f38b42a7ae9017ebf517 to your computer and use it in GitHub Desktop.
Save duchenpaul/68dba2f0f314f38b42a7ae9017ebf517 to your computer and use it in GitHub Desktop.
Transfer data between databases using pandas dataframe
from tqdm import tqdm
import pandas as pd
from sqlalchemy import create_engine, MetaData
SOURCE_TABLE_NAME_LIST = ['individual_tagging_sd', ]
def chunker(seq, size):
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
def sync_table(SOURCE_TABLE_NAME):
# print('Syncing {}'.format(SOURCE_TABLE_NAME))
TARGET_TABLE_NAME = SOURCE_TABLE_NAME
USER = 'mysql'
PASSWORD = 'mysql1'
SERVER = 'localhost'
DATABASE = 'customer_selection'
# driverString = '?driver=SQL+Server+Native+Client+11.0'
driverString = ''
engine = create_engine('mysql+mysqlconnector://{user}:{password}@{server}/{database}'.format(
user=USER, password=PASSWORD, server=SERVER, database=DATABASE,) + driverString)
metadata = MetaData(engine)
with engine.connect() as conn:
with conn.begin():
df = pd.read_sql_table(SOURCE_TABLE_NAME, conn)
# Dump to file for transferring
df.to_pickle("./dummy.pkl", compression='gzip')
print('Dataframe dumped')
# load file
df = pd.read_pickle("./dummy.pkl")
print('Dataframe loaded')
USER = 'postgres'
PASSWORD = 'raspberry'
SERVER = 'localhost'
DATABASE = 'customer_selection'
engine = create_engine('postgresql+psycopg2://{user}:{password}@{server}/{database}'.format(
user=USER, password=PASSWORD, server=SERVER, database=DATABASE,) + driverString)
chunksize = int(len(df) / 10) # 10%
with engine.connect() as conn:
with conn.begin(), tqdm(total=len(df)) as pbar:
for i, cdf in enumerate(chunker(df, chunksize)):
replace = "replace" if i == 0 else "append"
cdf.to_sql(con=engine, name=TARGET_TABLE_NAME, if_exists=replace, index=False)
pbar.update(chunksize)
print('Table {} loaded'.format(TARGET_TABLE_NAME))
if __name__ == '__main__':
for i in SOURCE_TABLE_NAME_LIST:
sync_table(i)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment