-
-
Save hannes/a95a39a1eda63aeb0ca13fd82d1ba49c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import psycopg2 | |
def timeit(fun, name): | |
import time | |
start_time = time.monotonic() | |
fun(); | |
print(f'{name}\t', time.monotonic() - start_time) | |
# connect to postgres | |
pg_con = psycopg2.connect(dbname='db') | |
pg_con.set_session(autocommit=True) | |
pg_cur = pg_con.cursor() | |
mydf = pd.DataFrame({'c1' : np.random.randint(100, size=10000000)}) | |
# easy but incredibly slow | |
import sqlalchemy | |
pg_engine = sqlalchemy.create_engine("postgresql:///db") | |
def pandas_to_postgres_to_sql(): | |
mydf.to_sql("df", pg_engine) | |
# harder but somewhat faster | |
def pandas_to_postgres_csv(): | |
import tempfile | |
pg_cur.execute("CREATE TABLE df (c1 INTEGER)") | |
csv_file = tempfile.mkstemp()[1] | |
mydf.to_csv(csv_file, header=False, index=False) | |
pg_cur.execute(f"COPY df FROM '{csv_file}'" ) | |
pg_cur.execute("DROP TABLE IF EXISTS df") | |
timeit(pandas_to_postgres_to_sql, "Pandas to PostgreSQL using to_sql") | |
pg_cur.execute("DROP TABLE IF EXISTS df") | |
timeit(pandas_to_postgres_csv, "Pandas to PostgreSQL using temporary CSV file") | |
import sqlite3 | |
sqlite_con = sqlite3.connect(":memory:") | |
def pandas_to_sqlite_to_sql(): | |
mydf.to_sql("df", sqlite_con) | |
timeit(pandas_to_sqlite_to_sql, "Pandas to SQLite using to_sql") | |
import duckdb | |
duckdb_con = duckdb.connect(":memory:") | |
def pandas_to_duckdb(): | |
duckdb_con.from_df(mydf).create('df') | |
timeit(pandas_to_duckdb, "Pandas to DuckDB") | |
def postgres_to_pandas(): | |
pd.read_sql_table('df', 'postgresql:///db') | |
def postgres_to_pandas_query(): | |
pd.read_sql_query('SELECT * FROM df', 'postgresql:///db') | |
def postgres_to_pandas_csv(): | |
import tempfile | |
csv_file = tempfile.mkstemp()[1] | |
pg_cur.execute(f"COPY df TO '{csv_file}'") | |
pd.read_csv(csv_file) | |
timeit(postgres_to_pandas, "PostgreSQL to Pandas") | |
timeit(postgres_to_pandas_query, "PostgreSQL to Pandas with Query") | |
timeit(postgres_to_pandas_csv, "PostgreSQL to Pandas using temporary CSV file") | |
def sqlite_to_pandas(): | |
pd.read_sql_query('SELECT * FROM df', sqlite_con) | |
timeit(sqlite_to_pandas, "SQLite to Pandas") | |
def duckdb_to_pandas(): | |
duckdb_con.table('df').to_df() | |
timeit(duckdb_to_pandas, "DuckDB to Pandas") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment