Skip to content

Instantly share code, notes, and snippets.

@hannes
Last active January 20, 2022 07:31
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hannes/a95a39a1eda63aeb0ca13fd82d1ba49c to your computer and use it in GitHub Desktop.
Save hannes/a95a39a1eda63aeb0ca13fd82d1ba49c to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
import psycopg2
def timeit(fun, name):
import time
start_time = time.monotonic()
fun();
print(f'{name}\t', time.monotonic() - start_time)
# connect to postgres
pg_con = psycopg2.connect(dbname='db')
pg_con.set_session(autocommit=True)
pg_cur = pg_con.cursor()
mydf = pd.DataFrame({'c1' : np.random.randint(100, size=10000000)})
# easy but incredibly slow
import sqlalchemy
pg_engine = sqlalchemy.create_engine("postgresql:///db")
def pandas_to_postgres_to_sql():
mydf.to_sql("df", pg_engine)
# harder but somewhat faster
def pandas_to_postgres_csv():
import tempfile
pg_cur.execute("CREATE TABLE df (c1 INTEGER)")
csv_file = tempfile.mkstemp()[1]
mydf.to_csv(csv_file, header=False, index=False)
pg_cur.execute(f"COPY df FROM '{csv_file}'" )
pg_cur.execute("DROP TABLE IF EXISTS df")
timeit(pandas_to_postgres_to_sql, "Pandas to PostgreSQL using to_sql")
pg_cur.execute("DROP TABLE IF EXISTS df")
timeit(pandas_to_postgres_csv, "Pandas to PostgreSQL using temporary CSV file")
import sqlite3
sqlite_con = sqlite3.connect(":memory:")
def pandas_to_sqlite_to_sql():
mydf.to_sql("df", sqlite_con)
timeit(pandas_to_sqlite_to_sql, "Pandas to SQLite using to_sql")
import duckdb
duckdb_con = duckdb.connect(":memory:")
def pandas_to_duckdb():
duckdb_con.from_df(mydf).create('df')
timeit(pandas_to_duckdb, "Pandas to DuckDB")
def postgres_to_pandas():
pd.read_sql_table('df', 'postgresql:///db')
def postgres_to_pandas_query():
pd.read_sql_query('SELECT * FROM df', 'postgresql:///db')
def postgres_to_pandas_csv():
import tempfile
csv_file = tempfile.mkstemp()[1]
pg_cur.execute(f"COPY df TO '{csv_file}'")
pd.read_csv(csv_file)
timeit(postgres_to_pandas, "PostgreSQL to Pandas")
timeit(postgres_to_pandas_query, "PostgreSQL to Pandas with Query")
timeit(postgres_to_pandas_csv, "PostgreSQL to Pandas using temporary CSV file")
def sqlite_to_pandas():
pd.read_sql_query('SELECT * FROM df', sqlite_con)
timeit(sqlite_to_pandas, "SQLite to Pandas")
def duckdb_to_pandas():
duckdb_con.table('df').to_df()
timeit(duckdb_to_pandas, "DuckDB to Pandas")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment