Skip to content

Instantly share code, notes, and snippets.

@eatonphil
Created November 12, 2023 19:58
Show Gist options
  • Save eatonphil/c2948f65d216a9ad3667801578154eee to your computer and use it in GitHub Desktop.
Save eatonphil/c2948f65d216a9ad3667801578154eee to your computer and use it in GitHub Desktop.
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define NUM_RECORDS 1000000 // 1M
#define NUM_QUERIES 1000 // number of queries to run in a row
#define LOCATION_ID 42
struct User {
int id;
char name[255];
int birth_location_id;
int time_of_birth;
int company;
int state;
int children;
bool married;
};
struct ColDB {
int *id;
char *name; // store the names contiguously to mimic row db
int *birth_location_id;
int *time_of_birth;
int *company;
int *state;
int *children;
bool *married;
} col_db;
// row oriented DB is just an array of users.
struct User *row_db;
struct ColDB col_db;
static void setup(void) {
row_db = malloc(sizeof(struct User) * NUM_RECORDS);
assert(row_db != NULL);
col_db.id = malloc(sizeof(int) * NUM_RECORDS);
assert(col_db.id != NULL);
col_db.name = malloc(255 * NUM_RECORDS);
assert(col_db.name != NULL);
col_db.birth_location_id = malloc(sizeof(int) * NUM_RECORDS);
assert(col_db.birth_location_id != NULL);
col_db.time_of_birth = malloc(sizeof(int) * NUM_RECORDS);
assert(col_db.time_of_birth != NULL);
col_db.company = malloc(sizeof(int) * NUM_RECORDS);
assert(col_db.company != NULL);
col_db.state = malloc(sizeof(int) * NUM_RECORDS);
assert(col_db.state != NULL);
col_db.children = malloc(sizeof(int) * NUM_RECORDS);
assert(col_db.children != NULL);
col_db.married = malloc(sizeof(int) * NUM_RECORDS);
assert(col_db.married != NULL);
srand((unsigned int)time(NULL));
for (size_t i = 0; i < NUM_RECORDS; i++) {
int id = abs(rand());
int l = abs(rand()) % 100; // make some duplicate birth locations...
int t = abs(rand()) % 86400; // seconds in a day
row_db[i].id = id;
row_db[i].name[32] = '\0'; // just so we can print the names
row_db[i].birth_location_id = l;
row_db[i].time_of_birth = t;
row_db[i].company = l;
row_db[i].state = l;
row_db[i].children = l;
row_db[i].married = t % 10;
col_db.id[i] = id;
col_db.name[i * 255 + 32] = '\0';
col_db.birth_location_id[i] = l;
col_db.time_of_birth[i] = t;
col_db.company[i] = l;
col_db.state[i] = l;
col_db.children[i] = l;
col_db.married[i] = t % 10;
}
}
// SELECT COUNT(1) FROM users WHERE location = 42
static size_t rcount_users_by_location() {
size_t count = 0;
for (size_t i = 0; i < NUM_RECORDS; i++) {
struct User u = row_db[i];
if (u.birth_location_id == LOCATION_ID) {
count++;
}
}
return count;
}
// SELECT COUNT(1) FROM users WHERE location = 42
static size_t ccount_users_by_location() {
size_t count = 0;
for (size_t i = 0; i < NUM_RECORDS; i++) {
if (col_db.birth_location_id[i] == LOCATION_ID) {
count++;
}
}
return count;
}
struct Users {
size_t size;
size_t offset;
struct User* users;
};
static struct Users* users_init() {
struct Users* users = (struct Users*)malloc(sizeof(struct User));
assert(users != NULL);
users->size = NUM_RECORDS;
users->offset = 0;
users->users = (struct User*)malloc(sizeof(struct User) * users->size);
assert(users->users != NULL);
return users;
}
static void users_reset(struct Users* users) {
users->offset = 0;
}
static void users_append(struct Users* users, struct User user) {
assert(users->offset < users->size);
users->users[users->offset] = user;
users->offset++;
}
static void users_free(struct Users* users) {
free(users->users);
free(users);
}
// SELECT * FROM users WHERE location > 1
static struct Users* rusers_by_location(struct Users* users) {
for (size_t i = 0; i < NUM_RECORDS; i++) {
struct User u = row_db[i];
if (u.birth_location_id > 1) {
users_append(users, u);
}
}
return users;
}
// SELECT * FROM users WHERE location > 1
static struct Users* cusers_by_location(struct Users *users) {
for (size_t i = 0; i < NUM_RECORDS; i++) {
if (col_db.birth_location_id[i] > 1) {
struct User u = {
.id = col_db.id[i],
.birth_location_id = col_db.birth_location_id[i],
.time_of_birth = col_db.time_of_birth[i],
.company = col_db.company[i],
.state = col_db.state[i],
.children = col_db.children[i],
.married = col_db.married[i]
};
memcpy(u.name, col_db.name + (i * 255), 255);
users_append(users, u);
}
}
return users;
}
// SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T
static struct Users* rusers_restricted(struct Users* users) {
for (size_t i = 0; i < NUM_RECORDS; i++) {
struct User u = row_db[i];
if (u.birth_location_id == 42 || u.company == 1 || u.state == 12 || u.married == true || u.children > 2) {
users_append(users, u);
}
}
return users;
}
// SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T
static struct Users* cusers_restricted(struct Users *users) {
for (size_t i = 0; i < NUM_RECORDS; i++) {
if (col_db.birth_location_id[i] == 42 || col_db.company[i] == 1 || col_db.state[i] == 12 || col_db.married[i] == true || col_db.children[i] > 2) {
struct User u = {
.id = col_db.id[i],
.birth_location_id = col_db.birth_location_id[i],
.time_of_birth = col_db.time_of_birth[i],
.company = col_db.company[i],
.state = col_db.state[i],
.children = col_db.children[i],
.married = col_db.married[i]
};
memcpy(u.name, col_db.name + (i * 255), 255);
users_append(users, u);
}
}
return users;
}
// SELECT MAX(time_of_birth) FROM users;
static int cmax_time_of_birth(void) {
int result = 0;
for (size_t i = 0; i < NUM_RECORDS; i++) {
int t = col_db.time_of_birth[i];
if (t > result)
result = t;
}
return result;
}
// SELECT MAX(time_of_birth) FROM users;
static int rmax_time_of_birth(void) {
int result = 0;
for (size_t i = 0; i < NUM_RECORDS; i++) {
int t = row_db[i].time_of_birth;
if (t > result)
result = t;
}
return result;
}
int main(int argc, char **argv) {
setup();
int c, r = 0;
size_t result_count = 0;
struct User *results = malloc(sizeof(struct User) * NUM_RECORDS);
assert(results != NULL);
start = time(NULL);
for (size_t i = 0; i < NUM_QUERIES; i++) {
r = rmax_time_of_birth();
}
printf("[ROW] elapsed: %lds\trmax_time_of_birth: %d\t[SELECT MAX(time_of_birth) FROM users;]\n", time(NULL) - start, r);
time_t start = time(NULL);
for (size_t i = 0; i < NUM_QUERIES; i++) {
c = cmax_time_of_birth();
}
printf("[COL] elapsed: %lds\tcmax_time_of_birth: %d\t[SELECT MAX(time_of_birth) FROM users;]\n", time(NULL) - start, c);
start = time(NULL);
for (size_t i = 0; i < NUM_QUERIES; i++) {
result_count = rcount_users_by_location();
}
printf("[ROW] elapsed: %lds\trecords for rcount_users_by_location count: %ld\t[SELECT COUNT(1) FROM users WHERE location = 42]\n",
time(NULL) - start, result_count);
start = time(NULL);
for (size_t i = 0; i < NUM_QUERIES; i++) {
result_count = ccount_users_by_location();
}
printf("[COL] elapsed: %lds\trecords for ccount_users_by_location count: %ld\t[SELECT COUNT(1) FROM users WHERE location = 42]\n",
time(NULL) - start, result_count);
struct Users* users = users_init();
start = time(NULL);
for (size_t i = 0; i < NUM_QUERIES; i++) {
users = rusers_by_location(users);
result_count = users->offset;
users_reset(users);
}
printf("[ROW] elapsed: %lds\trecords for r_users_by_location: %ld [SELECT * FROM users WHERE location > 1]\n",
time(NULL) - start, result_count);
start = time(NULL);
for (size_t i = 0; i < NUM_QUERIES; i++) {
users = cusers_by_location(users);
result_count = users->offset;
users_reset(users);
}
printf("[COL] elapsed: %lds\trecords for c_users_by_location: %ld [SELECT * FROM users WHERE location > 1]\n",
time(NULL) - start, result_count);
start = time(NULL);
for (size_t i = 0; i < NUM_QUERIES; i++) {
users = rusers_restricted(users);
result_count = users->offset;
users_reset(users);
}
printf("[ROW] elapsed: %lds\trecords for r_users_restricted: %ld\t[SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T OR children > 2]\n",
time(NULL) - start, result_count);
start = time(NULL);
for (size_t i = 0; i < NUM_QUERIES; i++) {
users = cusers_restricted(users);
result_count = users->offset;
users_reset(users);
}
printf("[COL] elapsed: %lds\trecords for c_users_restricted: %ld\t[SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T OR children > 2]\n",
time(NULL) - start, result_count);
users_free(users);
return EXIT_SUCCESS;
}
default:
gcc -O0 -g3 -Wall -Wextra -Wconversion -Wdouble-promotion \
-Wno-unused-parameter -Wno-unused-function -Wno-sign-conversion \
main.c
format:
clang-format -i main.c
$ make
$ ./a.out
[ROW] elapsed: 9s rmax_time_of_birth: 86399 [SELECT MAX(time_of_birth) FROM users;]
[COL] elapsed: 2s cmax_time_of_birth: 86399 [SELECT MAX(time_of_birth) FROM users;]
[ROW] elapsed: 6s records for rcount_users_by_location count: 9984 [SELECT COUNT(1) FROM users WHERE location = 42]
[COL] elapsed: 1s records for ccount_users_by_location count: 9984 [SELECT COUNT(1) FROM users WHERE location = 42]
[ROW] elapsed: 19s records for r_users_by_location: 980011 [SELECT * FROM users WHERE location > 1]
[COL] elapsed: 24s records for c_users_by_location: 980011 [SELECT * FROM users WHERE location > 1]
[ROW] elapsed: 21s records for r_users_restricted: 998008 [SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T OR children > 2]
[COL] elapsed: 27s records for c_users_restricted: 998008 [SELECT * FROM users WHERE location = 42 OR company = 1 OR state = 12 OR married = T OR children > 2]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment