Skip to content

Instantly share code, notes, and snippets.

@llimllib
Last active October 1, 2024 17:33
Show Gist options
  • Save llimllib/4cb06c5fe7439aa7f3cb67a818fa230d to your computer and use it in GitHub Desktop.
Save llimllib/4cb06c5fe7439aa7f3cb67a818fa230d to your computer and use it in GitHub Desktop.
comparing sqlite in C to sqlite libraries in go

The goal here was to create a C translation of the "simple" benchmark from here which inserts a million users and queries them back out.

The results on my system are, all times best of 2 runs and in milliseconds:

lib insert query
C sqlite3 737 84
mattn/sqlite3 1380 1036
crawshaw.io/sqlite 1097 542
modernc.org/sqlite 4148 966
eatonphil/gosqlite 1017 651
ncruces/go-sqlite3 2178 611
python 3.12 1992 1426
zombiezen/go-sqlite 1403 211
$ go version
go version go1.23.1 darwin/arm64
import sqlite3
import time
from typing import NamedTuple
NUSERS = 1000000
class User(NamedTuple):
id: int
created: float
email: str
active: int
def init_schema(db):
schemas = [
"PRAGMA journal_mode=DELETE",
"PRAGMA synchronous=FULL",
"PRAGMA foreign_keys=1",
"PRAGMA busy_timeout=5000",
"CREATE TABLE users ("
" id INTEGER PRIMARY KEY NOT NULL,"
" created INTEGER NOT NULL,"
" email TEXT NOT NULL,"
" active INTEGER NOT NULL)",
"CREATE INDEX users_created ON users(created)",
"CREATE TABLE articles ("
" id INTEGER PRIMARY KEY NOT NULL,"
" created INTEGER NOT NULL,"
" userId INTEGER NOT NULL REFERENCES users(id),"
" text TEXT NOT NULL)",
"CREATE INDEX articles_created ON articles(created)",
"CREATE INDEX articles_userId ON articles(userId)",
"CREATE TABLE comments ("
" id INTEGER PRIMARY KEY NOT NULL,"
" created INTEGER NOT NULL,"
" articleId INTEGER NOT NULL REFERENCES articles(id),"
" text TEXT NOT NULL)",
"CREATE INDEX comments_created ON comments(created)",
"CREATE INDEX comments_articleId ON comments(articleId)",
]
for schema in schemas:
try:
db.execute(schema)
except sqlite3.Error as e:
print(f"Error executing {schema}: {e}")
exit(1)
def bench_simple(dbfile, verbose=False):
db = sqlite3.connect(dbfile)
print("initting schema")
init_schema(db)
base = int((time.time() - 86400 * 30) * 1000)
users = [
User(
id=i + 1,
created=base + i * 60_000,
email=f"user{i+1:08d}@example.com",
active=1,
)
for i in range(NUSERS)
]
start = time.perf_counter()
with db:
db.executemany(
"INSERT INTO users(id,created,email,active) VALUES(?,?,?,?)",
[(user.id, int(user.created), user.email, user.active) for user in users],
)
insert_millis = (time.perf_counter() - start) * 1000
if verbose:
print(f" insert took {insert_millis:.0f} ms")
start = time.perf_counter()
result_users = [
User(*row)
for row in db.execute("SELECT id,created,email,active FROM users ORDER BY id")
]
query_millis = (time.perf_counter() - start) * 1000
if verbose:
print(f" query took {query_millis:.0f} ms")
count = len(result_users)
if count != NUSERS:
print(f"Expected {NUSERS} users, got {count}")
# Validate query result
for result_user, expected_user in zip(result_users, users):
if result_user.id != expected_user.id:
print(f"Expected id {expected_user.id}, got {result_user.id}")
if result_user.created != expected_user.created:
print(
f"Wrong created time for user {result_user.id}: {result_user.created} !< {base} !< {base + NUSERS * 60}"
)
if not result_user.email.startswith("user"):
print(f"Wrong email format for user {result_user.id}: {result_user.email}")
if result_user.active != 1:
print(
f"Expected active = 1 for user {result_user.id}, got {result_user.active}"
)
print(f"1_simple - insert - sqlite3 - {insert_millis:.0f}")
print(f"1_simple - query - sqlite3 - {query_millis:.0f}")
db.close()
if __name__ == "__main__":
import sys
if len(sys.argv) != 2:
print(f"Usage: {sys.argv[0]} <database_file>")
sys.exit(1)
dbfile = sys.argv[1]
print(f"starting up {dbfile}")
bench_simple(dbfile, verbose=True)
run-bench: clean bench
./bench /tmp/test.sqlite
bench:
clang -o bench -lsqlite3 -g sqlite.c
clean:
rm -f bench /tmp/test.sqlite
#include <sqlite3.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define NUSERS 1000000
typedef struct {
int id;
time_t created;
char email[32];
int active;
} User;
void initSchema(sqlite3 *db) {
char *schema[] = {"PRAGMA journal_mode=DELETE",
"PRAGMA synchronous=FULL",
"PRAGMA foreign_keys=1",
"PRAGMA busy_timeout=5000",
"CREATE TABLE users ("
" id INTEGER PRIMARY KEY NOT NULL,"
" created INTEGER NOT NULL,"
" email TEXT NOT NULL,"
" active INTEGER NOT NULL)",
"CREATE INDEX users_created ON users(created)",
"CREATE TABLE articles ("
" id INTEGER PRIMARY KEY NOT NULL,"
" created INTEGER NOT NULL,"
" userId INTEGER NOT NULL REFERENCES users(id),"
" text TEXT NOT NULL)",
"CREATE INDEX articles_created ON articles(created)",
"CREATE INDEX articles_userId ON articles(userId)",
"CREATE TABLE comments ("
" id INTEGER PRIMARY KEY NOT NULL,"
" created INTEGER NOT NULL,"
" articleId INTEGER NOT NULL REFERENCES articles(id),"
" text TEXT NOT NULL)",
"CREATE INDEX comments_created ON comments(created)",
"CREATE INDEX comments_articleId ON comments(articleId)",
NULL};
for (int i = 0; schema[i] != NULL; i++) {
char *err = NULL;
int rc = sqlite3_exec(db, schema[i], NULL, NULL, &err);
if (rc != SQLITE_OK) {
fprintf(stderr, "Error executing %s: %s\n", schema[i], err);
sqlite3_free(err);
exit(1);
}
}
}
void benchSimple(const char *dbfile, int verbose) {
if (verbose) {
fprintf(stderr, "opening\n");
}
sqlite3 *db;
int rc = sqlite3_open(dbfile, &db);
if (rc != SQLITE_OK) {
fprintf(stderr, "Cannot open database: %s\n", sqlite3_errmsg(db));
sqlite3_close(db);
exit(1);
}
fprintf(stderr, "initting schema\n");
initSchema(db);
User *users = malloc(NUSERS * sizeof(User));
if (users == NULL) {
fprintf(stderr, "Failed to allocate memory for users\n");
sqlite3_close(db);
exit(1);
}
time_t base = time(NULL) - 86400 * 30; // 30 days ago
for (int i = 0; i < NUSERS; i++) {
users[i].id = i + 1;
users[i].created = base + i * 60; // 1 minute apart
snprintf(users[i].email, sizeof(users[i].email), "user%08d@example.com",
i + 1);
users[i].active = 1;
}
clock_t t0 = clock();
sqlite3_stmt *stmt;
rc = sqlite3_prepare_v2(
db, "INSERT INTO users(id,created,email,active) VALUES(?,?,?,?)", -1,
&stmt, NULL);
if (rc != SQLITE_OK) {
fprintf(stderr, "Failed to prepare statement: %s\n", sqlite3_errmsg(db));
sqlite3_close(db);
exit(1);
}
sqlite3_exec(db, "BEGIN TRANSACTION", NULL, NULL, NULL);
for (int i = 0; i < NUSERS; i++) {
sqlite3_bind_int(stmt, 1, users[i].id);
sqlite3_bind_int64(stmt, 2, users[i].created * 1000);
sqlite3_bind_text(stmt, 3, users[i].email, -1, SQLITE_TRANSIENT);
sqlite3_bind_int(stmt, 4, users[i].active);
rc = sqlite3_step(stmt);
if (rc != SQLITE_DONE) {
fprintf(stderr, "Insert failed: %s\n", sqlite3_errmsg(db));
sqlite3_finalize(stmt);
sqlite3_close(db);
exit(1);
}
sqlite3_reset(stmt);
}
sqlite3_exec(db, "COMMIT", NULL, NULL, NULL);
sqlite3_finalize(stmt);
double insertMillis = (double)(clock() - t0) * 1000 / CLOCKS_PER_SEC;
if (verbose) {
printf(" insert took %.0f ms\n", insertMillis);
}
User *resultUsers = malloc(NUSERS * sizeof(User));
t0 = clock();
sqlite3_stmt *selectStmt;
rc = sqlite3_prepare_v2(
db, "SELECT id,created,email,active FROM users ORDER BY id", -1,
&selectStmt, NULL);
if (rc != SQLITE_OK) {
fprintf(stderr, "Failed to prepare statement: %s\n", sqlite3_errmsg(db));
sqlite3_close(db);
exit(1);
}
int count = 0;
while ((rc = sqlite3_step(selectStmt)) == SQLITE_ROW) {
resultUsers[count].id = sqlite3_column_int(selectStmt, 0);
resultUsers[count].created = sqlite3_column_int64(selectStmt, 1);
const char *email = (const char *)sqlite3_column_text(selectStmt, 2);
strncpy(resultUsers[count].email, email, sizeof(resultUsers[count].email));
resultUsers[count].email[sizeof(resultUsers[count].email) - 1] =
'\0'; // Ensure null termination
resultUsers[count].active = sqlite3_column_int(selectStmt, 3);
count++;
}
if (rc != SQLITE_DONE) {
fprintf(stderr, "Select failed: %s\n", sqlite3_errmsg(db));
sqlite3_finalize(selectStmt);
sqlite3_close(db);
exit(1);
}
sqlite3_finalize(selectStmt);
double queryMillis = (double)(clock() - t0) * 1000 / CLOCKS_PER_SEC;
if (verbose) {
printf(" query took %.0f ms\n", queryMillis);
}
if (count != NUSERS) {
fprintf(stderr, "Expected %d users, got %d\n", NUSERS, count);
}
// Validate query result
for (int i = 0; i < count; i++) {
if (resultUsers[i].id != i + 1) {
fprintf(stderr, "Expected id %d, got %d\n", i + 1, resultUsers[i].id);
}
if (resultUsers[i].created < base * 1000 ||
resultUsers[i].created > (base + NUSERS * 60) * 1000) {
fprintf(stderr, "Wrong created time for user %d: %ld !< %ld !< %ld\n",
resultUsers[i].id, base * 1000, resultUsers[i].created,
base * 1000 + NUSERS * 60);
}
if (strncmp(resultUsers[i].email, "user", 4) != 0) {
fprintf(stderr, "Wrong email format for user %d: %s\n", resultUsers[i].id,
resultUsers[i].email);
}
if (resultUsers[i].active != 1) {
fprintf(stderr, "Expected active = 1 for user %d, got %d\n",
resultUsers[i].id, resultUsers[i].active);
}
}
printf("1_simple - insert - sqlite3 - %.0f\n", insertMillis);
printf("1_simple - query - sqlite3 - %.0f\n", queryMillis);
sqlite3_close(db);
}
int main(int argc, char *argv[]) {
if (argc != 2) {
fprintf(stderr, "Usage: %s <database_file>\n", argv[0]);
return 1;
}
fprintf(stderr, "starting up %s\n", argv[1]);
benchSimple(argv[1], 1);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment