Skip to content

Instantly share code, notes, and snippets.

@karenc
Last active December 17, 2020 14:10
Show Gist options
  • Save karenc/0a043511ea6812bb85f4b424e9a98779 to your computer and use it in GitHub Desktop.
Save karenc/0a043511ea6812bb85f4b424e9a98779 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import functools
import os
import sqlite3
import time
import psycopg2
import sqlalchemy
def setup_sqlite():
sqlite3_conn = sqlite3.connect(sqlite_db)
sqlite3_cursor = sqlite3_conn.cursor()
sqlite3_cursor.execute(
"select tbl_name from sqlite_master where tbl_name = ? and type = 'table'",
(table_name,))
if sqlite3_cursor.fetchall():
sqlite3_cursor.execute(f'select count(*) from {table_name}')
count = sqlite3_cursor.fetchone()[0]
print(f'Use existing table {table_name} ({count} rows) in sqlite database')
else:
print(f'Create new table {table_name} in sqlite database')
created_table['sqlite'] = True
sqlite3_cursor.execute(
f'create table {table_name} (annot_rowid integer primary key, image_rowid integer not null)')
for i in range(queries):
sqlite3_cursor.execute(
f'insert into {table_name} (annot_rowid, image_rowid) values (?, ?)',
(i, i * 10))
sqlite3_conn.close()
def setup_postgres():
with psycopg2.connect(postgres_db) as conn:
with conn.cursor() as cursor:
cursor.execute(
'select table_name from information_schema.tables where table_name = %s and table_schema = %s',
(table_name, postgres_schema)
)
if cursor.fetchall():
cursor.execute(f'select count(*) from {table_name}')
count = cursor.fetchone()[0]
print(f'Use existing table {table_name} ({count} rows) in postgres database')
else:
created_table['postgres'] = True
print(f'Create new table {table_name} in postgres database')
cursor.execute(
f'create table {table_name} (annot_rowid integer primary key, image_rowid integer not null)')
for i in range(queries):
cursor.execute(
f'insert into {table_name} (annot_rowid, image_rowid) values (%s, %s)',
(i, i * 10))
def clean_up():
if created_table['sqlite']:
print(f'Drop table {table_name} in sqlite')
sqlite_conn = sqlite3.connect(sqlite_db)
sqlite_cursor = sqlite_conn.cursor()
sqlite_cursor.execute(f'drop table {table_name}')
if created_table['postgres']:
print(f'Drop table {table_name} in postgres')
with psycopg2.connect(postgres_db) as conn:
with conn.cursor() as cursor:
cursor.execute(f'drop table {postgres_schema}.{table_name}')
print('---')
def average_time(func):
times = []
@functools.wraps(func)
def wrapper(*args, **kwargs):
for i in range(iterations):
start = time.time()
func(*args, **kwargs)
times.append(time.time() - start)
print(f'Average time for running {func}: {sum(times) / iterations}')
return wrapper
@average_time
def run_sqlite():
stmt = f'select image_rowid from {table_name} where annot_rowid = ?'
for rowid in range(queries):
sqlite3_cursor.execute(stmt, (rowid,))
sqlite3_cursor.fetchall()
@average_time
def run_psycopg2():
stmt = f'select image_rowid from {table_name} where annot_rowid = %s'
for rowid in range(queries):
psycopg2_cursor.execute(stmt, (rowid,))
psycopg2_cursor.fetchall()
@average_time
def run_sqlalchemy_sqlite():
stmt = f'select image_rowid from {table_name} where annot_rowid = ?'
for rowid in range(queries):
sqlalchemy_conn['sqlite'].execute(stmt, (rowid,)).fetchall()
@average_time
def run_sqlalchemy_postgres():
stmt = f'select image_rowid from {table_name} where annot_rowid = %s'
for rowid in range(queries):
sqlalchemy_conn['postgres'].execute(stmt, (rowid,)).fetchall()
@average_time
def run_sqlalchemy_text_sqlite():
stmt = sqlalchemy.sql.text(f'select image_rowid from {table_name} where annot_rowid = :rowid')
for rowid in range(queries):
sqlalchemy_conn['sqlite'].execute(stmt, rowid=rowid).fetchall()
@average_time
def run_sqlalchemy_text_postgres():
stmt = sqlalchemy.sql.text(f'select image_rowid from {table_name} where annot_rowid = :rowid')
for rowid in range(queries):
sqlalchemy_conn['postgres'].execute(stmt, rowid=rowid).fetchall()
@average_time
def run_sqlalchemy_table_text_sqlite():
annot_rowid = annotations['sqlite'].c['annot_rowid']
image_rowid = annotations['sqlite'].c['image_rowid']
stmt = sqlalchemy.select([image_rowid]).\
where(annot_rowid == sqlalchemy.sql.bindparam('rowid', type_=annot_rowid.type))
stmt = sqlalchemy.sql.text(str(stmt))
for rowid in range(queries):
sqlalchemy_conn['sqlite'].execute(stmt, rowid=rowid).fetchall()
@average_time
def run_sqlalchemy_table_text_postgres():
annot_rowid = annotations['postgres'].c['annot_rowid']
image_rowid = annotations['postgres'].c['image_rowid']
stmt = sqlalchemy.select([image_rowid]).\
where(annot_rowid == sqlalchemy.sql.bindparam('rowid', type_=annot_rowid.type))
stmt = sqlalchemy.sql.text(str(stmt))
for rowid in range(queries):
sqlalchemy_conn['postgres'].execute(stmt, rowid=rowid).fetchall()
@average_time
def run_sqlalchemy_table_sqlite():
annot_rowid = annotations['sqlite'].c['annot_rowid']
image_rowid = annotations['sqlite'].c['image_rowid']
stmt = sqlalchemy.select([image_rowid]).\
where(annot_rowid == sqlalchemy.sql.bindparam('rowid', type_=annot_rowid.type))
for rowid in range(queries):
sqlalchemy_conn['sqlite'].execute(stmt, rowid=rowid).fetchall()
@average_time
def run_sqlalchemy_table_postgres():
annot_rowid = annotations['postgres'].c['annot_rowid']
image_rowid = annotations['postgres'].c['image_rowid']
stmt = sqlalchemy.select([image_rowid]).\
where(annot_rowid == sqlalchemy.sql.bindparam('rowid', type_=annot_rowid.type))
for rowid in range(queries):
sqlalchemy_conn['postgres'].execute(stmt, rowid=rowid).fetchall()
@average_time
def run_sqlalchemy_reduced_queries_sqlite():
annot_rowid = annotations['sqlite'].c['annot_rowid']
image_rowid = annotations['sqlite'].c['image_rowid']
params_iter = list(range(queries))
result_map = {}
BATCH_SIZE = 250000
stmt = sqlalchemy.select([annot_rowid, image_rowid])
where_clause = annot_rowid.in_(sqlalchemy.sql.bindparam('value', expanding=True))
stmt = stmt.where(where_clause)
for batch in range(queries // BATCH_SIZE + 1):
val_list = sqlalchemy_conn['sqlite'].execute(stmt, value=params_iter[batch * BATCH_SIZE:(batch + 1) * BATCH_SIZE])
for val in val_list:
existing = result_map.setdefault(val[0], [])
if val[1:] not in existing:
existing.append(val[1:])
@average_time
def run_sqlalchemy_reduced_queries_postgres():
annot_rowid = annotations['postgres'].c['annot_rowid']
image_rowid = annotations['postgres'].c['image_rowid']
params_iter = list(range(queries))
result_map = {}
stmt = sqlalchemy.select([annot_rowid, image_rowid])
where_clause = annot_rowid.in_(sqlalchemy.sql.bindparam('value', expanding=True))
stmt = stmt.where(where_clause)
val_list = sqlalchemy_conn['postgres'].execute(stmt, value=params_iter)
for val in val_list:
existing = result_map.setdefault(val[0], [])
if val[1:] not in existing:
existing.append(val[1:])
# Experimental: https://docs.sqlalchemy.org/en/14/core/connections.html#sql-compilation-caching
# Couldn't get it to be quick
# @average_time
# def run_sqlalchemy_table_lambda():
# annot_rowid = annotations.c['image_rowid']
# for rowid in range(queries):
# stmt = sqlalchemy.lambda_stmt(lambda: sqlalchemy.select([annot_rowid]))
# stmt += lambda s: s.where(annot_rowid == rowid) #sqlalchemy.sql.bindparam('rowid', type_=annot_rowid.type))
# sqlalchemy_conn.execute(stmt)
if __name__ == '__main__':
sqlite_db = os.getenv('SQLITE_DB', 'benchmark.sqlite3')
postgres_db = os.getenv('POSTGRES_DB', 'postgresql://wbia:wbia@db/testdb1')
postgres_schema = os.getenv('POSTGRES_SCHEMA', 'public')
queries = int(os.getenv('QUERIES', 10000))
iterations = int(os.getenv('ITERATIONS', 10))
table_name = os.getenv('TABLE_NAME', 'annotations')
print(f'''\
SQLITE_DB={sqlite_db}
POSTGRES_DB={postgres_db}
POSTGRES_SCHEMA={postgres_schema}
QUERIES={queries}
ITERATIONS={iterations}
TABLE_NAME={table_name}
sqlalchemy_version={sqlalchemy.__version__}
''')
engine = {
'sqlite': sqlalchemy.create_engine(f'sqlite:///{sqlite_db}'),
'postgres': sqlalchemy.create_engine(postgres_db),
}
sqlalchemy_conn = {
'sqlite': engine['sqlite'].connect(),
'postgres': engine['postgres'].connect(),
}
created_table = {
'sqlite': None,
'postgres': None,
}
setup_sqlite()
sqlite3_conn = sqlite3.connect(sqlite_db)
sqlite3_cursor = sqlite3_conn.cursor()
setup_postgres()
psycopg2_conn = psycopg2.connect(postgres_db)
psycopg2_cursor = psycopg2_conn.cursor()
psycopg2_cursor.execute('set schema %s', (postgres_schema,))
sqlalchemy_md = {
'sqlite': sqlalchemy.MetaData(),
'postgres': sqlalchemy.MetaData(),
}
sqlalchemy_md['sqlite'].reflect(bind=engine['sqlite'])
sqlalchemy_md['postgres'].reflect(bind=engine['postgres'],
schema=postgres_schema)
annotations = {
'sqlite': sqlalchemy_md['sqlite'].tables[table_name],
'postgres': sqlalchemy_md['postgres'].tables[
f'{postgres_schema}.{table_name}'],
}
try:
run_sqlite()
run_psycopg2()
run_sqlalchemy_sqlite()
run_sqlalchemy_postgres()
run_sqlalchemy_text_sqlite()
run_sqlalchemy_text_postgres()
run_sqlalchemy_table_text_sqlite()
run_sqlalchemy_table_text_postgres()
run_sqlalchemy_table_sqlite()
run_sqlalchemy_table_postgres()
run_sqlalchemy_reduced_queries_sqlite()
run_sqlalchemy_reduced_queries_postgres()
finally:
sqlalchemy_conn.clear()
sqlite3_conn.close()
psycopg2_cursor.close()
psycopg2_conn.close()
clean_up()
@karenc
Copy link
Author

karenc commented Dec 17, 2020

Using sqlalchemy version 1.4.0b1, the average time for doing 10000 queries in seconds:

SQLite Postgres Example code
Raw 0.049 0.681
import sqlite3
conn = sqlite3.connect(sqlite_db)
cursor = conn.cursor()
for rowid in range(10000):
    cursor.execute('SELECT image_rowid FROM annotations WHERE annot_rowid = ?', (rowid,))
    cursor.fetchall()
sqlalchemy with dialect specific sql 0.458 1.216
import sqlalchemy
engine = sqlalchemy.create_engine(sqlite_db_uri)
sqlalchemy_conn = engine.connect()
for rowid in range(10000):
    sqlalchemy_conn.execute(
        'SELECT image_rowid FROM annotations WHERE annot_rowid = ?',
        (rowid,)
    ).fetchall()
sqlalchemy with backend neutral support text clause 0.694 1.534
stmt = sqlalchemy.sql.text(
    'SELECT image_rowid FROM annotations WHERE annot_rowid = :rowid')
for rowid in range(10000):
    sqlalchemy_conn.execute(stmt, rowid=rowid).fetchall()
sqlalchemy with reflected tables 0.643 1.520
metadata = sqlalchemy.MetaData()
metadata.reflect(bind=engine)
annotations = metadata.tables['annotations']
annot_rowid = annotations.c['annot_rowid']
image_rowid = annotations.c['image_rowid']
stmt = sqlalchemy.select([image_rowid]).where(
    annot_rowid == sqlalchemy.sql.bindparam('rowid', type_=annot_rowid.type))
for rowid in range(10000):
    sqlalchemy_conn.execute(stmt, rowid=rowid).fetchall()
sqlalchemy with reduced queries 0.013 0.051
stmt = sqlalchemy.select([annot_rowid, image_rowid]).where(
    annot_rowid.in_(sqlalchemy.sql.bindparam('value', expanding=True)))
val_list = sqlalchemy_conn.execute(stmt, value=range(10000))
result_map = {}
for val in val_list:
    result_map.setdefault(val[0], set()).add(val[1:])

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment