Stephan Garland stephanGarland

## wake_backup.py
#!/usr/bin/env python3

import datetime
import os
import socket
import subprocess
import sys
import time

MAC_ADDR = "0025904F3A00"

## zfs_data_size_with_varying_recordsizes.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                stephanGarland
                / zfs_data_size_with_varying_recordsizes.md
            
            
              Last active
              June 11, 2023 16:33
            
              
                ZFS data size exploration
              
          
    I ran some experiments with varying recordsizes, filesizes, and compression. The files were .csv, representative of a simple schema:
full_name,external_id,last_modified
'Past, Gabrielle',40605,'2006-07-09 23:17:20' 
'Vachil, Corry',44277,'1996-09-05 05:12:44'

The files were all generated on an ext4 filesystem. There were three sets of five files, with 75, 100,000, and 1,000,000 rows each, resulting in the following sizes:
❯ find . -name '*small*.csv' -exec du -bc {} + | \

awk 'END {printf "%s %.2f %s\n", "Average file size:", ($1 / (NR-1) / 1024), "KiB"}'

  
## mkjup.sh
# Requirements:
# jupyter-notebook: https://jupyter.org/install
# Python3: https://www.python.org/downloads
# virtualenv: https://pypi.org/project/virtualenv
# virtualenvwrapper: https://virtualenvwrapper.readthedocs.io/en/latest/install.html
# zsh
# Tested with Jupyter Notebook 6.5.3, Python 3.11.4, and zsh 5.8

mkjup() {
  # This is a base64-encoded skeleton of an .ipynb file, with two variables which are filled via envsubst later

## bench_mysql_innodb_myisam.sh
#!/usr/bin/env bash

function create_tables() {
    tbl_1=$(cat <<EOF
    CREATE TABLE IF NOT EXISTS test_innodb (
        user_id CHAR(32) NOT NULL PRIMARY KEY,
        user_email VARCHAR(254) NOT NULL,
        created_at DATETIME NOT NULL,
        tags JSON NOT NULL DEFAULT ('{}'),
        shared_with JSON NOT NULL DEFAULT ('{}'),

## bench_mysql_innodb_myisam.py
import csv
import json
import os
import pymysql
import statistics
import time


def prepare_values(row):
    new_row = []

## indexing_bools_low_cardinality.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                stephanGarland
                / indexing_bools_low_cardinality.md
            
            
              Created
              October 5, 2023 11:40
            
              
                Demonstrating how indexing booleans is often not helpful
              
          
    System Specifications


OS: Debian Bullseye 5.10.0-23-amd64
Virtualized: Yes (Proxmox)
CPU: E5-2650 v2 @ 2.60GHz
Allocated Core Count: 16
Allocated RAM: 64 GiB PC3-12800R
Disk: Samsung PM983 1.92 TiB via Ceph
Filesystem: XFS
Mount Options: defaults,noatime
Postgres Version: 15.3


## mysql_binary_truncation.sql
mysql> CREATE TABLE foo(id TINYINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, bar BINARY(16), baz CHAR(36));
Query OK, 0 rows affected (0.03 sec)

mysql> INSERT INTO foo (bar, baz) VALUES (UUID_TO_BIN('dc23a9b9-a129-11ee-95fb-0242ac110000'), 'dc23a9b9-a129-11ee-95fb-0242ac110000');
Query OK, 1 row affected (0.02 sec)

mysql> INSERT INTO foo (bar, baz) VALUES (UUID_TO_BIN('dc23a9b9-a129-11ee-95fb-0242ac110000'), 'dc23a9b9a12911ee95fb0242ac110000');
Query OK, 1 row affected (0.01 sec)

mysql> SHOW BINARY LOGS;

## tree_insert.sql
SET search_path TO 'example';

INSERT INTO genre (name) VALUES ('Rock'), ('Classic Rock'), ('Metal'), ('Progressive Metal'), ('Arena Rock'), ('Alternative Metal'), ('Grunge');
INSERT INTO category (name) VALUES ('Book'), ('Music');
INSERT INTO format (name) VALUES ('Compact Disc'), ('Vinyl');

INSERT INTO band (year_formed, name) VALUES (1985, 'Majesty'), (1988, 'Dream Theater'), (1990, 'Tool'), (1970, 'Queen'), (1987, 'Alice in Chains');

INSERT INTO artist (first_name, last_name) VALUES ('John', 'Petrucci'), ('John', 'Myung'), ('James', 'LaBrie'), ('Jordan', 'Ruddess'), ('Mike', 'Portnoy'), ('Mike', 'Mangini');
INSERT INTO artist (first_name, last_name, prefix, suffix) VALUES ('Brian', 'May', 'Sir', 'CBE');

## postgres_clustering.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                stephanGarland
                / postgres_clustering.md
            
            
              Created
              February 9, 2024 23:37
            
              
                Demonstrating Postgres' clustering operation
              
          
    This table has 1,000,000 rows, consisting of a UUIDv4 PK, a random int of range (1,1000000), and ~1 KiB of Lorem Ipsum text.
postgres=# \d+ uuid_pk
                                          Table "public.uuid_pk"
 Column  |  Type   | Collation | Nullable | Default | Storage  | Compression | Stats target | Description
---------+---------+-----------+----------+---------+----------+-------------+--------------+-------------
 id      | uuid    |           | not null |         | plain    |             |              |
 user_id | integer |           | not null |         | plain    |             |              |
 lorem   | text    |           | not null |         | extended |             |              |


## comparing_distinct_limit.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                stephanGarland
                / comparing_distinct_limit.md
            
            
              Created
              February 22, 2024 18:35
            
              
                Demonstrating DISTINCT with LIMIT on MySQL and Postgres
              
          
    Introduction

MySQL's documentation states:

When combining LIMIT row_count with DISTINCT, MySQL stops as soon as it finds row_count unique rows.

The question was asked, how is this deterministic / accurate? My hypothesis is that it's due to MySQL's use of a clustering
index, as opposed to Postgres' heap storage. I think that, given a monotonic PK such as an AUTO_INCREMENT (or perhaps
any index), it's able to use that to guarantee determinism.
	#!/usr/bin/env python3

	import datetime
	import os
	import socket
	import subprocess
	import sys
	import time

	MAC_ADDR = "0025904F3A00"
	# Requirements:
	# jupyter-notebook: https://jupyter.org/install
	# Python3: https://www.python.org/downloads
	# virtualenv: https://pypi.org/project/virtualenv
	# virtualenvwrapper: https://virtualenvwrapper.readthedocs.io/en/latest/install.html
	# zsh
	# Tested with Jupyter Notebook 6.5.3, Python 3.11.4, and zsh 5.8

	mkjup() {
	# This is a base64-encoded skeleton of an .ipynb file, with two variables which are filled via envsubst later
	#!/usr/bin/env bash

	function create_tables() {
	tbl_1=$(cat <<EOF
	CREATE TABLE IF NOT EXISTS test_innodb (
	user_id CHAR(32) NOT NULL PRIMARY KEY,
	user_email VARCHAR(254) NOT NULL,
	created_at DATETIME NOT NULL,
	tags JSON NOT NULL DEFAULT ('{}'),
	shared_with JSON NOT NULL DEFAULT ('{}'),
	import csv
	import json
	import os
	import pymysql
	import statistics
	import time


	def prepare_values(row):
	new_row = []
	mysql> CREATE TABLE foo(id TINYINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, bar BINARY(16), baz CHAR(36));
	Query OK, 0 rows affected (0.03 sec)

	mysql> INSERT INTO foo (bar, baz) VALUES (UUID_TO_BIN('dc23a9b9-a129-11ee-95fb-0242ac110000'), 'dc23a9b9-a129-11ee-95fb-0242ac110000');
	Query OK, 1 row affected (0.02 sec)

	mysql> INSERT INTO foo (bar, baz) VALUES (UUID_TO_BIN('dc23a9b9-a129-11ee-95fb-0242ac110000'), 'dc23a9b9a12911ee95fb0242ac110000');
	Query OK, 1 row affected (0.01 sec)

	mysql> SHOW BINARY LOGS;
	SET search_path TO 'example';

	INSERT INTO genre (name) VALUES ('Rock'), ('Classic Rock'), ('Metal'), ('Progressive Metal'), ('Arena Rock'), ('Alternative Metal'), ('Grunge');
	INSERT INTO category (name) VALUES ('Book'), ('Music');
	INSERT INTO format (name) VALUES ('Compact Disc'), ('Vinyl');

	INSERT INTO band (year_formed, name) VALUES (1985, 'Majesty'), (1988, 'Dream Theater'), (1990, 'Tool'), (1970, 'Queen'), (1987, 'Alice in Chains');

	INSERT INTO artist (first_name, last_name) VALUES ('John', 'Petrucci'), ('John', 'Myung'), ('James', 'LaBrie'), ('Jordan', 'Ruddess'), ('Mike', 'Portnoy'), ('Mike', 'Mangini');
	INSERT INTO artist (first_name, last_name, prefix, suffix) VALUES ('Brian', 'May', 'Sir', 'CBE');