Skip to content

Instantly share code, notes, and snippets.

@simonw
Created September 10, 2023 17:15
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save simonw/271b6bcad3cb8b839c49aa44299525a5 to your computer and use it in GitHub Desktop.
Save simonw/271b6bcad3cb8b839c49aa44299525a5 to your computer and use it in GitHub Desktop.
SQLite micro-benchmark with Python threading

I got GPT-4 Code Interpreter to write most of this for me, transcript here

I had to modify it a tiny bit to get it to run, the parameters for the queries were missing.

Results on my M2 MacBook Pro:

---------------
Mode | Scenario | Update QPS | Select QPS
---------------
Normal | Combined | 1843.3 | 29.6
---------------
Normal | Update Only | 2300.3 | 0.0
---------------
Normal | Select Only | 0.0 | 205874.9
---------------
WAL | Combined | 11641.0 | 462251.8
---------------
WAL | Update Only | 9229.7 | 0.0
---------------
WAL | Select Only | 0.0 | 356455.4
---------------

I don't think this is an accurate benchmark of concurrent access though, because it's using Python threads which are subject to the GIL. So treat these numbers with suspicion.

They do seem to very much demonstrate the power of WAL mode though.

It's weird that the number of selects in WAL combined mode beats select only in that mode too. I got the same result a few times, and I don't understand why.

import sqlite3
import random
import threading
import time
import sys
DURATION = 10
def setup_database(db_path, wal_mode=False):
conn = sqlite3.connect(db_path)
if wal_mode:
conn.execute("PRAGMA journal_mode=WAL")
cursor = conn.cursor()
cursor.execute('DROP TABLE IF EXISTS counters')
cursor.execute('DROP TABLE IF EXISTS large_data')
cursor.execute('''
CREATE TABLE counters (
id INTEGER PRIMARY KEY,
value INTEGER DEFAULT 0
)
''')
for i in range(1000):
cursor.execute('INSERT INTO counters (id) VALUES (?)', (i+1,))
cursor.execute('''
CREATE TABLE large_data (
id INTEGER PRIMARY KEY,
data TEXT
)
''')
for i in range(10000):
data = ''.join(random.choice('abcdefghijklmnopqrstuvwxyz') for _ in range(100))
cursor.execute('INSERT INTO large_data (data) VALUES (?)', (data,))
conn.commit()
conn.close()
def benchmark(db_path, do_update, do_select):
ops_count = {"update": 0, "select": 0}
def worker(query, key):
conn_local = sqlite3.connect(db_path)
end_time = time.time() + DURATION
while time.time() < end_time:
with conn_local:
conn_local.execute(query, (random.randint(1, 1000),))
ops_count[key] += 1
conn_local.close()
if do_update:
t1 = threading.Thread(target=worker, args=('UPDATE counters SET value = value + 1 WHERE id = ?', "update"))
t1.start()
if do_select:
t2 = threading.Thread(target=worker, args=('SELECT * FROM large_data WHERE id = ?', "select"))
t2.start()
if do_update:
t1.join()
if do_select:
t2.join()
return ops_count["update"]/DURATION, ops_count["select"]/DURATION
def print_ascii_table(data):
widths = [max(map(len, str(col))) for col in zip(*data)]
total_width = sum(widths) + 3 * len(widths) - 1
format_str = ' | '.join('%%-%ds' % width for width in widths)
print('-' * total_width)
for row in data:
print(format_str % tuple(row))
print('-' * total_width)
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: sqlite_benchmark.py <database_basename>")
sys.exit(1)
base_name = sys.argv[1]
results = [("Mode", "Scenario", "Update QPS", "Select QPS")]
for mode, wal in [("Normal", False), ("WAL", True)]:
db_path = f"{base_name}-{mode.lower()}.db"
setup_database(db_path, wal_mode=wal)
results.append((mode, "Combined", *benchmark(db_path, True, True)))
results.append((mode, "Update Only", *benchmark(db_path, True, False)))
results.append((mode, "Select Only", *benchmark(db_path, False, True)))
print_ascii_table(results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment