simonw/README.md

## README.md

      
    Raw
  

              README.md
            
          
    I got GPT-4 Code Interpreter to write most of this for me, transcript here
I had to modify it a tiny bit to get it to run, the parameters for the queries were missing.
Results on my M2 MacBook Pro:
---------------
Mode | Scenario | Update QPS | Select QPS
---------------
Normal | Combined | 1843.3 | 29.6
---------------
Normal | Update Only | 2300.3 | 0.0
---------------
Normal | Select Only | 0.0 | 205874.9
---------------
WAL | Combined | 11641.0 | 462251.8
---------------
WAL | Update Only | 9229.7 | 0.0
---------------
WAL | Select Only | 0.0 | 356455.4
---------------

I don't think this is an accurate benchmark of concurrent access though, because it's using Python threads which are subject to the GIL. So treat these numbers with suspicion.
They do seem to very much demonstrate the power of WAL mode though.
It's weird that the number of selects in WAL combined mode beats select only in that mode too. I got the same result a few times, and I don't understand why.

  
## benchmark.py
import sqlite3
import random
import threading
import time
import sys

DURATION = 10

def setup_database(db_path, wal_mode=False):
    conn = sqlite3.connect(db_path)
    if wal_mode:
        conn.execute("PRAGMA journal_mode=WAL")
    cursor = conn.cursor()

    cursor.execute('DROP TABLE IF EXISTS counters')
    cursor.execute('DROP TABLE IF EXISTS large_data')

    cursor.execute('''
    CREATE TABLE counters (
        id INTEGER PRIMARY KEY,
        value INTEGER DEFAULT 0
    )
    ''')
    for i in range(1000):
        cursor.execute('INSERT INTO counters (id) VALUES (?)', (i+1,))

    cursor.execute('''
    CREATE TABLE large_data (
        id INTEGER PRIMARY KEY,
        data TEXT
    )
    ''')
    for i in range(10000):
        data = ''.join(random.choice('abcdefghijklmnopqrstuvwxyz') for _ in range(100))
        cursor.execute('INSERT INTO large_data (data) VALUES (?)', (data,))

    conn.commit()
    conn.close()

def benchmark(db_path, do_update, do_select):
    ops_count = {"update": 0, "select": 0}

    def worker(query, key):
        conn_local = sqlite3.connect(db_path)
        end_time = time.time() + DURATION
        while time.time() < end_time:
            with conn_local:
                conn_local.execute(query, (random.randint(1, 1000),))
            ops_count[key] += 1
        conn_local.close()

    if do_update:
        t1 = threading.Thread(target=worker, args=('UPDATE counters SET value = value + 1 WHERE id = ?', "update"))
        t1.start()

    if do_select:
        t2 = threading.Thread(target=worker, args=('SELECT * FROM large_data WHERE id = ?', "select"))
        t2.start()

    if do_update:
        t1.join()
    if do_select:
        t2.join()

    return ops_count["update"]/DURATION, ops_count["select"]/DURATION

def print_ascii_table(data):
    widths = [max(map(len, str(col))) for col in zip(*data)]
    total_width = sum(widths) + 3 * len(widths) - 1
    format_str = ' | '.join('%%-%ds' % width for width in widths)
    print('-' * total_width)
    for row in data:
        print(format_str % tuple(row))
        print('-' * total_width)

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: sqlite_benchmark.py <database_basename>")
        sys.exit(1)

    base_name = sys.argv[1]
    results = [("Mode", "Scenario", "Update QPS", "Select QPS")]

    for mode, wal in [("Normal", False), ("WAL", True)]:
        db_path = f"{base_name}-{mode.lower()}.db"
        setup_database(db_path, wal_mode=wal)
        results.append((mode, "Combined", *benchmark(db_path, True, True)))
        results.append((mode, "Update Only", *benchmark(db_path, True, False)))
        results.append((mode, "Select Only", *benchmark(db_path, False, True)))

    print_ascii_table(results)
	import sqlite3
	import random
	import threading
	import time
	import sys

	DURATION = 10

	def setup_database(db_path, wal_mode=False):
	conn = sqlite3.connect(db_path)
	if wal_mode:
	conn.execute("PRAGMA journal_mode=WAL")
	cursor = conn.cursor()

	cursor.execute('DROP TABLE IF EXISTS counters')
	cursor.execute('DROP TABLE IF EXISTS large_data')

	cursor.execute('''
	CREATE TABLE counters (
	id INTEGER PRIMARY KEY,
	value INTEGER DEFAULT 0
	)
	''')
	for i in range(1000):
	cursor.execute('INSERT INTO counters (id) VALUES (?)', (i+1,))

	cursor.execute('''
	CREATE TABLE large_data (
	id INTEGER PRIMARY KEY,
	data TEXT
	)
	''')
	for i in range(10000):
	data = ''.join(random.choice('abcdefghijklmnopqrstuvwxyz') for _ in range(100))
	cursor.execute('INSERT INTO large_data (data) VALUES (?)', (data,))

	conn.commit()
	conn.close()

	def benchmark(db_path, do_update, do_select):
	ops_count = {"update": 0, "select": 0}

	def worker(query, key):
	conn_local = sqlite3.connect(db_path)
	end_time = time.time() + DURATION
	while time.time() < end_time:
	with conn_local:
	conn_local.execute(query, (random.randint(1, 1000),))
	ops_count[key] += 1
	conn_local.close()

	if do_update:
	t1 = threading.Thread(target=worker, args=('UPDATE counters SET value = value + 1 WHERE id = ?', "update"))
	t1.start()

	if do_select:
	t2 = threading.Thread(target=worker, args=('SELECT * FROM large_data WHERE id = ?', "select"))
	t2.start()

	if do_update:
	t1.join()
	if do_select:
	t2.join()

	return ops_count["update"]/DURATION, ops_count["select"]/DURATION

	def print_ascii_table(data):
	widths = [max(map(len, str(col))) for col in zip(*data)]
	total_width = sum(widths) + 3 * len(widths) - 1
	format_str = ' \| '.join('%%-%ds' % width for width in widths)
	print('-' * total_width)
	for row in data:
	print(format_str % tuple(row))
	print('-' * total_width)

	if __name__ == "__main__":
	if len(sys.argv) != 2:
	print("Usage: sqlite_benchmark.py <database_basename>")
	sys.exit(1)

	base_name = sys.argv[1]
	results = [("Mode", "Scenario", "Update QPS", "Select QPS")]

	for mode, wal in [("Normal", False), ("WAL", True)]:
	db_path = f"{base_name}-{mode.lower()}.db"
	setup_database(db_path, wal_mode=wal)
	results.append((mode, "Combined", *benchmark(db_path, True, True)))
	results.append((mode, "Update Only", *benchmark(db_path, True, False)))
	results.append((mode, "Select Only", *benchmark(db_path, False, True)))

	print_ascii_table(results)