Skip to content

Instantly share code, notes, and snippets.

@jeromer
Created September 16, 2016 08:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeromer/a7e6ab89be76209e0f9faf9520d5db4b to your computer and use it in GitHub Desktop.
Save jeromer/a7e6ab89be76209e0f9faf9520d5db4b to your computer and use it in GitHub Desktop.
#! /usr/bin/env python
# Tested on Postgres 9.5
import psycopg2
import datetime
conn = psycopg2.connect("dbname=sandbox user=postgres")
cursor = conn.cursor()
TABLE = "test_bulk_update"
ROWS_TO_LOAD = int(1e6)
SEP = "-" * 50
START = 0
END = ROWS_TO_LOAD - 100
def main():
create_test_table()
fill_table()
update_method_a()
update_method_b()
update_method_c()
update_method_d()
drop_test_table()
def create_test_table():
cursor.execute("""
CREATE TABLE %s(
id SERIAL NOT NULL,
text text,
CONSTRAINT test_bulk_update_pkey PRIMARY KEY (id)
);
""" % TABLE)
conn.commit()
def fill_table():
sql = """
INSERT INTO %s (text) VALUES (generate_series(1, %d));
""" % (TABLE, ROWS_TO_LOAD)
cursor.execute(sql)
conn.commit()
sql = "SELECT COUNT(*) as total FROM %s" % TABLE
cursor.execute(sql)
total = cursor.fetchall()[0][0]
print "Testing with %d rows" % (total)
def update_method_a():
print SEP
print "Method A (one commit per update)... zzzZZZZzZZZZZzZZZ"
start = datetime.datetime.now()
for i in range(START, END):
sql = "UPDATE " + TABLE + " SET text = 'a' WHERE id = %s"
cursor.execute(sql, [i])
conn.commit()
end = datetime.datetime.now()
print "Done : %s" % (end - start)
def update_method_b():
print SEP
print "Method B (one global commit)"
start = datetime.datetime.now()
for i in range(START, END):
sql = "UPDATE " + TABLE + " SET text = 'b' WHERE id = %s"
cursor.execute(sql, [i])
conn.commit()
end = datetime.datetime.now()
print "Done : %s" % (end - start)
def update_method_c():
print SEP
print "Method C (Using IN)"
start = datetime.datetime.now()
sql = """
UPDATE %s SET text = 'c'
WHERE id IN (SELECT id FROM %s)
""" % (TABLE, TABLE)
cursor.execute(sql)
conn.commit()
end = datetime.datetime.now()
print "Done : %s" % (end - start)
def update_method_d():
print SEP
print "Method D (Using FROM source in UPDATE)"
start = datetime.datetime.now()
sql = "CREATE TEMP TABLE tmp_table(LIKE %s INCLUDING ALL);" % TABLE
cursor.execute(sql)
conn.commit()
sql = """
INSERT INTO tmp_table SELECT * FROM %s WHERE id BETWEEN %s AND %s
""" % (TABLE, START, END)
cursor.execute(sql)
conn.commit()
sql = """
UPDATE %s t1
SET text = 'd'
FROM tmp_table t2
WHERE t1.id = t2.id
""" % TABLE
cursor.execute(sql)
conn.commit()
sql = "DROP TABLE tmp_table"
cursor.execute(sql)
conn.commit()
end = datetime.datetime.now()
print "Done : %s" % (end - start)
def drop_test_table():
sql = "DROP TABLE %s" % TABLE
cursor.execute(sql)
conn.commit()
if __name__ == '__main__':
main()
# Testing with 10000 rows
# --------------------------------------------------
# Method A (one commit per update)... zzzZZZZzZZZZZzZZZ
# Done : 0:00:36.887775
# --------------------------------------------------
# Method B (one global commit)
# Done : 0:00:00.915838
# --------------------------------------------------
# Method C (Using IN)
# Done : 0:00:00.054597
# --------------------------------------------------
# Method D (Using FROM source in UPDATE)
# Done : 0:00:00.098071
# Testing with 100000 rows
# --------------------------------------------------
# Method A (one commit per update)... zzzZZZZzZZZZZzZZZ
# Done : 0:06:38.097410
# --------------------------------------------------
# Method B (one global commit)
# Done : 0:00:08.409724
# --------------------------------------------------
# Method C (Using IN)
# Done : 0:00:00.607172
# --------------------------------------------------
# Method D (Using FROM source in UPDATE)
# Done : 0:00:00.801136
# Testing with 500000 rows (method A disabled because to slow)
# --------------------------------------------------
# Method B (one global commit)
# Done : 0:00:41.990436
# --------------------------------------------------
# Method C (Using IN)
# Done : 0:00:04.296147
# --------------------------------------------------
# Method D (Using FROM source in UPDATE)
# Done : 0:00:05.939987
# Testing with 1000000 rows (method A disabled because to slow)
# --------------------------------------------------
# Method B (one global commit)
# Done : 0:01:25.472691
# --------------------------------------------------
# Method C (Using IN)
# Done : 0:00:08.467847
# --------------------------------------------------
# Method D (Using FROM source in UPDATE)
# Done : 0:00:12.588521
# (This is actually divided in ~6 seconds to create the
# temporary table and ~ 6 seconds to apply the update)
# Maybe tweaking temp_buffer could help
# https://www.postgresql.org/docs/current/static/runtime-config-resource.html
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment