Last active
December 14, 2015 10:19
-
-
Save idlesign/5070908 to your computer and use it in GitHub Desktop.
Reveal NULL in SQL evil or not quite. Naive test %) PgSQL. Python 2.7
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import uuid | |
from contextlib import contextmanager | |
from collections import Counter | |
import psycopg2 | |
DB_NAME = 'bench' | |
DB_USER = 'postgres' | |
DB_PASSWORD = 'fhutynev1' | |
RANDOM_ENTRIES_COUNT = 300 | |
WITH_NULL = 'with_null' | |
WITHOUT_NULL = 'without_null' | |
DB_TABLES = { | |
WITH_NULL: { | |
'init': ''' | |
DROP TABLE IF EXISTS parent_w; | |
CREATE TABLE parent_w | |
( | |
uid character varying NOT NULL, | |
ts_modified integer, | |
parent_uid character varying, | |
CONSTRAINT pri_parentw_uid PRIMARY KEY (uid ) | |
); | |
DROP INDEX IF EXISTS idx_parentw_uid; | |
CREATE INDEX idx_parentw_uid ON parent_w USING btree (parent_uid); | |
''', | |
'insert': "INSERT INTO parent_w VALUES ('%(uid)s', %(time)s, '%(pid)s');" | |
}, | |
WITHOUT_NULL: { | |
'init': ''' | |
DROP TABLE IF EXISTS parent_wo; | |
CREATE TABLE parent_wo | |
( | |
uid character varying NOT NULL, | |
ts_modified integer, | |
CONSTRAINT pri_parentwo_uid PRIMARY KEY (uid ) | |
); | |
-- Links table below. | |
DROP TABLE IF EXISTS parent_wo_links; | |
CREATE TABLE parent_wo_links | |
( | |
uid character varying NOT NULL, | |
child_uid character varying NOT NULL, | |
ts_modified integer | |
); | |
DROP INDEX IF EXISTS idx_parentwo_parent_uid; | |
CREATE INDEX idx_parentwo_parent_uid ON parent_wo_links USING btree (uid); | |
DROP INDEX IF EXISTS idx_parentwo_child_uid; | |
CREATE INDEX idx_parentwo_child_uid ON parent_wo_links USING btree (child_uid); | |
''', | |
'insert': ''' | |
INSERT INTO parent_wo VALUES ('%(uid)s', %(time)s); | |
INSERT INTO parent_wo_links VALUES ('%(pid)s', '%(uid)s', %(time)s); | |
''' | |
} | |
} | |
@contextmanager | |
def db_context(): | |
connection = psycopg2.connect('dbname=%s user=%s password=%s' % (DB_NAME, DB_USER, DB_PASSWORD)) | |
cur = connection.cursor() | |
yield cur | |
cur.close() | |
connection.close() | |
def init_tables(cursor): | |
for name, instructions in DB_TABLES.items(): | |
cursor.execute(instructions['init']) | |
def prepare_insert_sql(sql, entry_data): | |
insert_sql = sql % entry_data | |
insert_sql = insert_sql.replace("'None'", 'NULL') | |
if '(NULL' in insert_sql: | |
insert_sql = '%s;' % insert_sql.split(';')[0] | |
return insert_sql | |
def generate_entry_data(uids_known=None): | |
if uids_known is None: | |
uids_known = [None] | |
entry_id = str(uuid.uuid4()) | |
parent_id = random.choice(uids_known) | |
time = random.randint(100000000, 999999999) | |
return {'uid': entry_id, 'pid': parent_id, 'time': time} | |
def seed_data(cursor): | |
uids_known = [None] | |
uids_with_children = [] | |
uids_with_parents = [] | |
entries = [] | |
for entry_counter in range(RANDOM_ENTRIES_COUNT): | |
entry_data = generate_entry_data(uids_known) | |
if entry_data['pid'] is not None: | |
uids_with_children.append(entry_data['pid']) | |
uids_with_parents.append(entry_data['uid']) | |
uids_known.append(entry_data['uid']) | |
entries.append(entry_data) | |
for name, instructions in DB_TABLES.items(): | |
for entry in entries: | |
insert_sql = prepare_insert_sql(instructions['insert'], entry) | |
cursor.execute(insert_sql) | |
return uids_known, uids_with_children, uids_with_parents | |
def explain_sql(cursor, sql): | |
timing = 0 | |
for sql_part in sql.split(';'): | |
sql_part = sql_part.strip(' \n') | |
if sql_part: | |
explained_sql = '\nEXPLAIN ANALYZE %s' % sql_part | |
cursor.execute(explained_sql) | |
result = cursor.fetchall() | |
lines = [item[0] for item in result] | |
timed = float(lines[-1].replace('Total runtime: ', '').replace(' ms', '')) | |
print(' %s --> %s' % (explained_sql, timed)) | |
print('\n'.join(lines)) | |
timing += timed | |
return timing | |
def compare_sql(cursor, title, sql_rule_one, sql_rule_two): | |
print('=====' * 20) | |
print('\n%s\n' % title) | |
time_one = explain_sql(cursor, sql_rule_one[1]) | |
print('\nVS\n') | |
time_two = explain_sql(cursor, sql_rule_two[1]) | |
calc_gain_percent = lambda winner, looser: 100 - round(winner * 100 / looser) | |
if time_one < time_two: | |
winner = sql_rule_one[0] | |
vs_time = time_one, time_two, calc_gain_percent(time_one, time_two) | |
else: | |
winner = sql_rule_two[0] | |
vs_time = time_two, time_one, calc_gain_percent(time_two, time_one) | |
timimg_str = '(%s ms VS %s ms) - %s%% faster' % vs_time | |
print('\nThe winner is `%s` %s' % (winner, timimg_str)) | |
return winner | |
def print_final_results(winners): | |
results = Counter(winners) | |
print('\n\n=====\n\nThe final result is %s\n' % dict(results)) | |
if results[WITH_NULL] == results[WITHOUT_NULL]: | |
print('DRAW') | |
else: | |
if results[WITH_NULL] > results[WITHOUT_NULL]: | |
winner = WITH_NULL | |
else: | |
winner = WITHOUT_NULL | |
print('THE WINNER IS `%s`' % winner) | |
if __name__ == '__main__': | |
with db_context() as cursor: | |
init_tables(cursor) | |
uids_known, uids_with_children, uids_with_parents = seed_data(cursor) | |
last_known = uids_known[-1] | |
first_with_children = uids_with_children[0] | |
new_entry_data = generate_entry_data() | |
winners = [] | |
winners.append(compare_sql(cursor, 'GET ROOT ITEMS', | |
[WITH_NULL, "SELECT * FROM parent_w WHERE parent_uid IS NULL;"], | |
[WITHOUT_NULL, "SELECT * FROM parent_wo AS p WHERE p.uid NOT IN (SELECT pl.child_uid FROM parent_wo_links AS pl);"])) | |
parent_uid = first_with_children | |
winners.append(compare_sql(cursor, 'GET CHILDREN OF %s NESTED SELECT' % parent_uid, | |
[WITH_NULL, "SELECT * FROM parent_w WHERE parent_uid='%s';" % parent_uid], | |
[WITHOUT_NULL, "SELECT * FROM parent_wo AS p WHERE p.uid IN (SELECT pl.child_uid FROM parent_wo_links AS pl WHERE pl.uid='%s');" % parent_uid])) | |
parent_uid = random.choice(uids_with_children) | |
winners.append(compare_sql(cursor, 'GET CHILDREN OF %s OUTER JOIN' % parent_uid, | |
[WITH_NULL, "SELECT * FROM parent_w WHERE parent_uid='%s';" % parent_uid], | |
[WITHOUT_NULL, "SELECT * FROM parent_wo AS p LEFT OUTER JOIN parent_wo_links AS pl ON (pl.uid=p.uid) WHERE p.uid='%s';" % parent_uid])) | |
new_entry_data['pid'] = last_known | |
winners.append(compare_sql(cursor, 'INSERT', | |
[WITH_NULL, prepare_insert_sql(DB_TABLES[WITH_NULL]['insert'], new_entry_data)], | |
[WITHOUT_NULL, prepare_insert_sql(DB_TABLES[WITHOUT_NULL]['insert'], new_entry_data)])) | |
current_uid = last_known | |
new_parent_uid = first_with_children | |
new_time = new_entry_data['time'] | |
winners.append(compare_sql(cursor, 'UPDATE PARENT', | |
[WITH_NULL, "UPDATE parent_w SET parent_uid='%s', ts_modified=%s WHERE uid='%s';" % (new_parent_uid, new_time, current_uid)], | |
[WITHOUT_NULL, ''' | |
UPDATE parent_wo SET ts_modified=%(time)s WHERE uid='%(current_uid)s'; | |
DELETE FROM parent_wo_links WHERE child_uid='%(current_uid)s'; | |
INSERT INTO parent_wo_links VALUES ('%(parent)s', '%(current_uid)s', %(time)s);''' % {'parent': new_parent_uid, 'current_uid': current_uid, 'time': new_time} | |
])) | |
print_final_results(winners) |
Outer join instead of select nesting.
GET CHILDREN OF cf2e0aa6-ec22-4cf4-8272-9891e531f661 OUTER JOIN
EXPLAIN ANALYZE SELECT * FROM parent_w WHERE parent_uid='cf2e0aa6-ec22-4cf4-8272-9891e531f661' --> 0.048
EXPLAIN ANALYZE SELECT * FROM parent_wo AS p LEFT OUTER JOIN parent_wo_links AS pl ON (pl.uid=p.uid) WHERE p.uid='cf2e0aa6-ec22-4cf4-8272-9891e531f661' --> 0.077
The winner is `with_null` (0.048 ms VS 0.077 ms) - 38.0% faster
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example results: