Skip to content

Instantly share code, notes, and snippets.

@idlesign
Last active December 14, 2015 10:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save idlesign/5070908 to your computer and use it in GitHub Desktop.
Save idlesign/5070908 to your computer and use it in GitHub Desktop.
Reveal NULL in SQL evil or not quite. Naive test %) PgSQL. Python 2.7
import random
import uuid
from contextlib import contextmanager
from collections import Counter
import psycopg2
DB_NAME = 'bench'
DB_USER = 'postgres'
DB_PASSWORD = 'fhutynev1'
RANDOM_ENTRIES_COUNT = 300
WITH_NULL = 'with_null'
WITHOUT_NULL = 'without_null'
DB_TABLES = {
WITH_NULL: {
'init': '''
DROP TABLE IF EXISTS parent_w;
CREATE TABLE parent_w
(
uid character varying NOT NULL,
ts_modified integer,
parent_uid character varying,
CONSTRAINT pri_parentw_uid PRIMARY KEY (uid )
);
DROP INDEX IF EXISTS idx_parentw_uid;
CREATE INDEX idx_parentw_uid ON parent_w USING btree (parent_uid);
''',
'insert': "INSERT INTO parent_w VALUES ('%(uid)s', %(time)s, '%(pid)s');"
},
WITHOUT_NULL: {
'init': '''
DROP TABLE IF EXISTS parent_wo;
CREATE TABLE parent_wo
(
uid character varying NOT NULL,
ts_modified integer,
CONSTRAINT pri_parentwo_uid PRIMARY KEY (uid )
);
-- Links table below.
DROP TABLE IF EXISTS parent_wo_links;
CREATE TABLE parent_wo_links
(
uid character varying NOT NULL,
child_uid character varying NOT NULL,
ts_modified integer
);
DROP INDEX IF EXISTS idx_parentwo_parent_uid;
CREATE INDEX idx_parentwo_parent_uid ON parent_wo_links USING btree (uid);
DROP INDEX IF EXISTS idx_parentwo_child_uid;
CREATE INDEX idx_parentwo_child_uid ON parent_wo_links USING btree (child_uid);
''',
'insert': '''
INSERT INTO parent_wo VALUES ('%(uid)s', %(time)s);
INSERT INTO parent_wo_links VALUES ('%(pid)s', '%(uid)s', %(time)s);
'''
}
}
@contextmanager
def db_context():
connection = psycopg2.connect('dbname=%s user=%s password=%s' % (DB_NAME, DB_USER, DB_PASSWORD))
cur = connection.cursor()
yield cur
cur.close()
connection.close()
def init_tables(cursor):
for name, instructions in DB_TABLES.items():
cursor.execute(instructions['init'])
def prepare_insert_sql(sql, entry_data):
insert_sql = sql % entry_data
insert_sql = insert_sql.replace("'None'", 'NULL')
if '(NULL' in insert_sql:
insert_sql = '%s;' % insert_sql.split(';')[0]
return insert_sql
def generate_entry_data(uids_known=None):
if uids_known is None:
uids_known = [None]
entry_id = str(uuid.uuid4())
parent_id = random.choice(uids_known)
time = random.randint(100000000, 999999999)
return {'uid': entry_id, 'pid': parent_id, 'time': time}
def seed_data(cursor):
uids_known = [None]
uids_with_children = []
uids_with_parents = []
entries = []
for entry_counter in range(RANDOM_ENTRIES_COUNT):
entry_data = generate_entry_data(uids_known)
if entry_data['pid'] is not None:
uids_with_children.append(entry_data['pid'])
uids_with_parents.append(entry_data['uid'])
uids_known.append(entry_data['uid'])
entries.append(entry_data)
for name, instructions in DB_TABLES.items():
for entry in entries:
insert_sql = prepare_insert_sql(instructions['insert'], entry)
cursor.execute(insert_sql)
return uids_known, uids_with_children, uids_with_parents
def explain_sql(cursor, sql):
timing = 0
for sql_part in sql.split(';'):
sql_part = sql_part.strip(' \n')
if sql_part:
explained_sql = '\nEXPLAIN ANALYZE %s' % sql_part
cursor.execute(explained_sql)
result = cursor.fetchall()
lines = [item[0] for item in result]
timed = float(lines[-1].replace('Total runtime: ', '').replace(' ms', ''))
print(' %s --> %s' % (explained_sql, timed))
print('\n'.join(lines))
timing += timed
return timing
def compare_sql(cursor, title, sql_rule_one, sql_rule_two):
print('=====' * 20)
print('\n%s\n' % title)
time_one = explain_sql(cursor, sql_rule_one[1])
print('\nVS\n')
time_two = explain_sql(cursor, sql_rule_two[1])
calc_gain_percent = lambda winner, looser: 100 - round(winner * 100 / looser)
if time_one < time_two:
winner = sql_rule_one[0]
vs_time = time_one, time_two, calc_gain_percent(time_one, time_two)
else:
winner = sql_rule_two[0]
vs_time = time_two, time_one, calc_gain_percent(time_two, time_one)
timimg_str = '(%s ms VS %s ms) - %s%% faster' % vs_time
print('\nThe winner is `%s` %s' % (winner, timimg_str))
return winner
def print_final_results(winners):
results = Counter(winners)
print('\n\n=====\n\nThe final result is %s\n' % dict(results))
if results[WITH_NULL] == results[WITHOUT_NULL]:
print('DRAW')
else:
if results[WITH_NULL] > results[WITHOUT_NULL]:
winner = WITH_NULL
else:
winner = WITHOUT_NULL
print('THE WINNER IS `%s`' % winner)
if __name__ == '__main__':
with db_context() as cursor:
init_tables(cursor)
uids_known, uids_with_children, uids_with_parents = seed_data(cursor)
last_known = uids_known[-1]
first_with_children = uids_with_children[0]
new_entry_data = generate_entry_data()
winners = []
winners.append(compare_sql(cursor, 'GET ROOT ITEMS',
[WITH_NULL, "SELECT * FROM parent_w WHERE parent_uid IS NULL;"],
[WITHOUT_NULL, "SELECT * FROM parent_wo AS p WHERE p.uid NOT IN (SELECT pl.child_uid FROM parent_wo_links AS pl);"]))
parent_uid = first_with_children
winners.append(compare_sql(cursor, 'GET CHILDREN OF %s NESTED SELECT' % parent_uid,
[WITH_NULL, "SELECT * FROM parent_w WHERE parent_uid='%s';" % parent_uid],
[WITHOUT_NULL, "SELECT * FROM parent_wo AS p WHERE p.uid IN (SELECT pl.child_uid FROM parent_wo_links AS pl WHERE pl.uid='%s');" % parent_uid]))
parent_uid = random.choice(uids_with_children)
winners.append(compare_sql(cursor, 'GET CHILDREN OF %s OUTER JOIN' % parent_uid,
[WITH_NULL, "SELECT * FROM parent_w WHERE parent_uid='%s';" % parent_uid],
[WITHOUT_NULL, "SELECT * FROM parent_wo AS p LEFT OUTER JOIN parent_wo_links AS pl ON (pl.uid=p.uid) WHERE p.uid='%s';" % parent_uid]))
new_entry_data['pid'] = last_known
winners.append(compare_sql(cursor, 'INSERT',
[WITH_NULL, prepare_insert_sql(DB_TABLES[WITH_NULL]['insert'], new_entry_data)],
[WITHOUT_NULL, prepare_insert_sql(DB_TABLES[WITHOUT_NULL]['insert'], new_entry_data)]))
current_uid = last_known
new_parent_uid = first_with_children
new_time = new_entry_data['time']
winners.append(compare_sql(cursor, 'UPDATE PARENT',
[WITH_NULL, "UPDATE parent_w SET parent_uid='%s', ts_modified=%s WHERE uid='%s';" % (new_parent_uid, new_time, current_uid)],
[WITHOUT_NULL, '''
UPDATE parent_wo SET ts_modified=%(time)s WHERE uid='%(current_uid)s';
DELETE FROM parent_wo_links WHERE child_uid='%(current_uid)s';
INSERT INTO parent_wo_links VALUES ('%(parent)s', '%(current_uid)s', %(time)s);''' % {'parent': new_parent_uid, 'current_uid': current_uid, 'time': new_time}
]))
print_final_results(winners)
@idlesign
Copy link
Author

idlesign commented Mar 2, 2013

Example results:


GET ROOT ITEMS

   EXPLAIN ANALYZE SELECT * FROM parent_w WHERE parent_uid IS NULL --> 0.04

   EXPLAIN ANALYZE SELECT * FROM parent_wo AS p WHERE p.uid NOT IN (SELECT pl.child_uid FROM parent_wo_links AS pl) --> 0.755

The winner is `with_null` (0.04 ms VS 0.755 ms) - 95.0% faster
====================================================================================================

GET CHILDREN OF 9896ff2b-173e-43c6-a5d3-c373ba25ea69

   EXPLAIN ANALYZE SELECT * FROM parent_w WHERE parent_uid='9896ff2b-173e-43c6-a5d3-c373ba25ea69' --> 0.077

   EXPLAIN ANALYZE SELECT * FROM parent_wo AS p WHERE p.uid IN (SELECT pl.child_uid FROM parent_wo_links AS pl WHERE pl.uid='9896ff2b-173e-43c6-a5d3-c373ba25ea69') --> 0.307

The winner is `with_null` (0.077 ms VS 0.307 ms) - 75.0% faster
====================================================================================================

INSERT

   EXPLAIN ANALYZE INSERT INTO parent_w VALUES ('aff31bf7-cac9-4cfb-bf85-c1dc3706a2ff', 252947302, '73daba3c-c6aa-44a0-96bf-94cc5b51558d') --> 0.064

   EXPLAIN ANALYZE INSERT INTO parent_wo VALUES ('aff31bf7-cac9-4cfb-bf85-c1dc3706a2ff', 252947302) --> 0.035
   EXPLAIN ANALYZE INSERT INTO parent_wo_links VALUES ('73daba3c-c6aa-44a0-96bf-94cc5b51558d', 'aff31bf7-cac9-4cfb-bf85-c1dc3706a2ff', 252947302) --> 0.045

The winner is `with_null` (0.064 ms VS 0.08 ms) - 20.0% faster
====================================================================================================

UPDATE PARENT

   EXPLAIN ANALYZE UPDATE parent_w SET parent_uid='9896ff2b-173e-43c6-a5d3-c373ba25ea69', ts_modified=252947302 WHERE uid='73daba3c-c6aa-44a0-96bf-94cc5b51558d' --> 0.127

   EXPLAIN ANALYZE UPDATE parent_wo SET ts_modified=252947302 WHERE uid='73daba3c-c6aa-44a0-96bf-94cc5b51558d' --> 0.057
   EXPLAIN ANALYZE DELETE FROM parent_wo_links WHERE child_uid='73daba3c-c6aa-44a0-96bf-94cc5b51558d' --> 0.053
   EXPLAIN ANALYZE INSERT INTO parent_wo_links VALUES ('9896ff2b-173e-43c6-a5d3-c373ba25ea69', '73daba3c-c6aa-44a0-96bf-94cc5b51558d', 252947302) --> 0.08

The winner is `with_null` (0.127 ms VS 0.19 ms) - 33.0% faster


=====

The final result is {'with_null': 4}

THE WINNER IS `with_null`

@idlesign
Copy link
Author

idlesign commented Mar 3, 2013

Outer join instead of select nesting.

GET CHILDREN OF cf2e0aa6-ec22-4cf4-8272-9891e531f661 OUTER JOIN

   EXPLAIN ANALYZE SELECT * FROM parent_w WHERE parent_uid='cf2e0aa6-ec22-4cf4-8272-9891e531f661' --> 0.048

   EXPLAIN ANALYZE SELECT * FROM parent_wo AS p LEFT OUTER JOIN parent_wo_links AS pl ON (pl.uid=p.uid) WHERE p.uid='cf2e0aa6-ec22-4cf4-8272-9891e531f661' --> 0.077

The winner is `with_null` (0.048 ms VS 0.077 ms) - 38.0% faster

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment