Skip to content

Instantly share code, notes, and snippets.

@shashfrankenstien
Created September 17, 2019 20:16
Show Gist options
  • Save shashfrankenstien/6ded3e92106ac1545e6776f79eb204aa to your computer and use it in GitHub Desktop.
Save shashfrankenstien/6ded3e92106ac1545e6776f79eb204aa to your computer and use it in GitHub Desktop.
Pandas DataFrame to MySQL using pymysql
import pymysql
import pandas as pd
HOST = ''
USER = ''
PASSWORD = ''
PORT = 3306
def insert_pandas(df, table, duplicates='update'):
conn = pymysql.connect(
host=HOST,
user=USER,
password=PASSWORD,
port=PORT,
autocommit=True
)
try:
with conn.cursor() as cur:
cur.execute('''select column_name from INFORMATION_SCHEMA.COLUMNS
where concat(table_schema, ".", table_name) = "{}"'''.format(table))
db_cols = [c[0] for c in cur.fetchall()]
print(db_cols)
fields = list(df)
if sorted(fields) != sorted(db_cols):
print(list(df), db_cols, sep="\n")
raise ValueError("DataFrame column names don't match")
update = duplicates.lower().strip()=='update'
insert_stmt = '''INSERT{ignore} INTO {table} ({cols}) VALUES ({placeholders})'''.format(
table=table,
cols=','.join(fields),
placeholders=','.join(['%s']*len(fields)),
ignore=' IGNORE' if not update else ''
)
if update:
update_string = ', '.join(['{f}=VALUES({f})'.format(f=f) for f in fields])
duplicates_postfix = ''' ON DUPLICATE KEY UPDATE {update_string}'''.format(update_string=update_string)
insert_stmt += duplicates_postfix
# print(insert_stmt)
df = df.replace(r'^\s*$', pd.np.nan, regex=True)
df = df.where((pd.notnull(df)), None)
print(df.head())
values = [tuple(x) for x in df[fields].values]
print("Inserting with '{}' {} rows into {}..".format(duplicates, len(df), table))
cur.executemany(sql=insert_stmt, args=values)
finally:
conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment