Skip to content

Instantly share code, notes, and snippets.

@absoup
Last active July 21, 2022 12:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save absoup/1f2295c3d51487cd805d50b14fb54ea9 to your computer and use it in GitHub Desktop.
Save absoup/1f2295c3d51487cd805d50b14fb54ea9 to your computer and use it in GitHub Desktop.
Function to convert pandas DataFrame object into a single bulk insert SQL query (insert multiple rows at once).
import pandas as pd
import re
def df_to_sql_bulk_insert(df: pd.DataFrame, table: str, **kwargs) -> str:
"""Converts DataFrame to bulk INSERT sql query
>>> data = [(1, "_suffixnan", 1), (2, "Noneprefix", 0), (3, "fooNULLbar", 1, 2.34)]
>>> df = pd.DataFrame(data, columns=["id", "name", "is_deleted", "balance"])
>>> df
id name is_deleted balance
0 1 _suffixnan 1 NaN
1 2 Noneprefix 0 NaN
2 3 fooNULLbar 1 2.34
>>> query = df_to_sql_bulk_insert(df, "users", status="APPROVED", address=None)
>>> print(query)
INSERT INTO users (id, name, is_deleted, balance, status, address)
VALUES (1, '_suffixnan', 1, NULL, 'APPROVED', NULL),
(2, 'Noneprefix', 0, NULL, 'APPROVED', NULL),
(3, 'fooNULLbar', 1, 2.34, 'APPROVED', NULL);
"""
df = df.copy().assign(**kwargs).replace({True: 1, False: 0})
columns = ", ".join(df.columns)
tuples = map(str, df.itertuples(index=False, name=None))
values = re.sub(r"(?<=\W)(nan|None)(?=\W)", "NULL", (",\n" + " " * 7).join(tuples))
query = "INSERT INTO {} ({})\nVALUES {};"
return query.format(table, columns, values)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment