Skip to content

Instantly share code, notes, and snippets.

@erichosick
Last active August 11, 2023 08:56
Show Gist options
  • Save erichosick/eeadc41e33ea2a91bd136b9433e73dc7 to your computer and use it in GitHub Desktop.
Save erichosick/eeadc41e33ea2a91bd136b9433e73dc7 to your computer and use it in GitHub Desktop.
-- EXTENSIONS ------------------------------------------------------------------
-- Supports uuid type and generating uuid
-- https://www.postgresql.org/docs/current/uuid-ossp.html
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- Supports cryptographic functions
-- https://www.postgresql.org/docs/current/pgcrypto.html
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
-- SCHEMA ----------------------------------------------------------------------
CREATE SCHEMA IF NOT EXISTS benchmark;
COMMENT ON SCHEMA benchmark IS 'contains tables used to test performance of different schema designs.';
-- TABLES ----------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS benchmark.compare (
compare_id uuid PRIMARY KEY NOT NULL DEFAULT uuid_generate_v1mc(),
description_sql_left VARCHAR(128) NOT NULL,
description_sql_right VARCHAR(1024) NOT NULL,
sql_left text NOT NULL,
sql_right text NOT NULL,
series_row_count bigint NOT NULL,
loop_count bigint NOT NULL,
created_on timestamptz NOT NULL DEFAULT NOW()
);
COMMENT ON TABLE benchmark.compare IS 'A comparision of two sql statements.';
CREATE TABLE IF NOT EXISTS benchmark.compare_result (
compare_result_id uuid PRIMARY KEY NOT NULL DEFAULT uuid_generate_v1mc(),
compare_id uuid NOT NULL REFERENCES benchmark.compare(compare_id),
run_number int NOT NULL,
started_left_on timestamptz NOT NULL,
stopped_left_on timestamptz NOT NULL,
started_right_on timestamptz NOT NULL,
stopped_right_on timestamptz NOT NULL,
run_time_sec_left decimal(18,10) GENERATED ALWAYS AS
(EXTRACT(epoch FROM stopped_left_on - started_left_on)) STORED,
run_time_sec_right decimal(18,10) GENERATED ALWAYS AS
(EXTRACT(epoch FROM stopped_right_on - started_right_on)) STORED,
created_on timestamptz NOT NULL DEFAULT NOW()
);
COMMENT ON TABLE benchmark.compare_result IS 'The outcome of comparing two sql statements one or more times.';
CREATE OR REPLACE PROCEDURE benchmark.execute (
p_description_sql_left VARCHAR(128),
p_description_sql_right VARCHAR(1024),
p_series_row_count bigint,
p_sql_left text,
p_sql_right text,
p_loop_count int = 20,
p_after_each text = ''
) LANGUAGE plpgsql AS
$$
DECLARE
d_started_left_on timestamptz = clock_timestamp();
d_stopped_left_on timestamptz = clock_timestamp();
d_started_right_on timestamptz = clock_timestamp();
d_stopped_right_on timestamptz = clock_timestamp();
d_compare_id uuid = uuid_generate_v1mc();
BEGIN
INSERT INTO benchmark.compare (
compare_id,
description_sql_left,
description_sql_right,
sql_left,
sql_right,
series_row_count,
loop_count
) VALUES (
d_compare_id,
p_description_sql_left,
p_description_sql_right,
p_sql_left,
p_sql_right,
p_series_row_count,
p_loop_count
);
FOR run_number IN 1..p_loop_count LOOP
d_started_left_on = clock_timestamp();
EXECUTE(p_sql_left);
d_stopped_left_on = clock_timestamp();
COMMIT;
d_started_right_on = clock_timestamp();
EXECUTE(p_sql_right);
d_stopped_right_on = clock_timestamp();
COMMIT;
INSERT INTO benchmark.compare_result (
compare_id,
run_number,
started_left_on,
stopped_left_on,
started_right_on,
stopped_right_on
) VALUES (
d_compare_id,
run_number,
d_started_left_on,
d_stopped_left_on,
d_started_right_on,
d_stopped_right_on
);
EXECUTE(p_after_each);
END LOOP;
END;
$$;
CREATE OR REPLACE VIEW benchmark.compare_result_threashold AS
SELECT
cres.compare_id,
PERCENTILE_DISC(0.1) WITHIN
GROUP (ORDER BY run_time_sec_left) AS lower_threashold_left,
PERCENTILE_DISC(0.9) WITHIN
GROUP (ORDER BY run_time_sec_left) AS upper_threashold_left,
PERCENTILE_DISC(0.1) WITHIN
GROUP (ORDER BY run_time_sec_right) AS lower_threashold_right,
PERCENTILE_DISC(0.9) WITHIN
GROUP (ORDER BY run_time_sec_right) AS upper_threashold_right
FROM benchmark.compare_result AS cres
GROUP BY cres.compare_id;
CREATE OR REPLACE VIEW benchmark.compare_threashold AS
SELECT
comp.description_sql_left,
comp.description_sql_right,
PERCENTILE_DISC(0.1) WITHIN
GROUP (ORDER BY run_time_sec_left) AS lower_threashold_left,
PERCENTILE_DISC(0.9) WITHIN
GROUP (ORDER BY run_time_sec_left) AS upper_threashold_left,
PERCENTILE_DISC(0.1) WITHIN
GROUP (ORDER BY run_time_sec_right) AS lower_threashold_right,
PERCENTILE_DISC(0.9) WITHIN
GROUP (ORDER BY run_time_sec_right) AS upper_threashold_right
FROM benchmark.compare AS comp
INNER JOIN benchmark.compare_result AS cres
ON cres.compare_id = comp.compare_id
GROUP BY comp.description_sql_left, comp.description_sql_right;
CREATE OR REPLACE VIEW benchmark.compare_result_outcome AS
WITH final AS (
WITH info AS (
SELECT
comp.compare_id,
STDDEV(calc.run_time_ms_left)::decimal(18,4) AS standard_deviation_left,
AVG(calc.run_time_ms_left)::decimal(18,4) AS average_left,
MIN(calc.run_time_ms_left)::decimal(18,4) AS min_time_ms_left,
MAX(calc.run_time_ms_left)::decimal(18,4) AS max_time_ms_left,
STDDEV(calc.run_time_ms_right)::decimal(18,4) AS standard_deviation_right,
AVG(calc.run_time_ms_right)::decimal(18,4) AS average_right,
MIN(calc.run_time_ms_right)::decimal(18,4) AS min_time_ms_right,
MAX(calc.run_time_ms_right)::decimal(18,4) AS max_time_ms_right
FROM benchmark.compare AS comp
INNER JOIN benchmark.compare_result AS cres
ON cres.compare_id = comp.compare_id,
LATERAL (
SELECT (
cres.run_time_sec_left / comp.series_row_count
)*1e6 AS run_time_ms_left,
(
cres.run_time_sec_right / comp.series_row_count
)*1e6 AS run_time_ms_right
) AS calc
GROUP BY comp.compare_id
)
SELECT
comp.compare_id,
CASE WHEN calc.time_range_in_ms_left && calc.time_range_in_ms_right
THEN 'neither' ELSE
CASE WHEN info.average_left > info.average_right THEN
'right' ELSE 'left'
END
END AS faster_sql,
false as applied_threshold,
comp.description_sql_left,
calc.time_range_in_ms_left,
info.standard_deviation_left,
info.average_left,
info.min_time_ms_left,
info.max_time_ms_left,
comp.description_sql_right,
calc.time_range_in_ms_right,
info.standard_deviation_right,
info.average_right,
info.min_time_ms_right,
info.max_time_ms_right
FROM info
INNER JOIN benchmark.compare AS comp ON comp.compare_id = info.compare_id,
LATERAL (
SELECT
NUMRANGE(
average_left - standard_deviation_left,
average_left + standard_deviation_left
) AS time_range_in_ms_left,
NUMRANGE(
average_right - standard_deviation_right,
average_right + standard_deviation_right
) AS time_range_in_ms_right
) AS calc
)
SELECT
final.*,
CASE WHEN final.faster_sql IN ('right', 'neither') THEN
(final.average_left / NULLIF(final.average_right,0))::decimal(18,2)
ELSE
(final.average_right / NULLIF(final.average_left,0))::decimal(18,2)
END AS times_faster
FROM final;
COMMENT ON VIEW benchmark.compare_result_outcome IS 'Grouping by benchmark.compare_result.compare_id, determines which sql statement (left, right or neither) was fastest considering standard deviation.';
CREATE OR REPLACE VIEW benchmark.compare_outcome AS
WITH final AS (
WITH info AS (
SELECT
comp.description_sql_left,
comp.description_sql_right,
comp.sql_left,
comp.sql_right,
STDDEV(calc.run_time_ms_left)::decimal(18,4) AS standard_deviation_left,
AVG(calc.run_time_ms_left)::decimal(18,4) AS average_left,
MIN(calc.run_time_ms_left)::decimal(18,4) AS min_time_ms_left,
MAX(calc.run_time_ms_left)::decimal(18,4) AS max_time_ms_left,
STDDEV(calc.run_time_ms_right)::decimal(18,4) AS standard_deviation_right,
AVG(calc.run_time_ms_right)::decimal(18,4) AS average_right,
MIN(calc.run_time_ms_right)::decimal(18,4) AS min_time_ms_right,
MAX(calc.run_time_ms_right)::decimal(18,4) AS max_time_ms_right
FROM benchmark.compare AS comp
INNER JOIN benchmark.compare_result AS cres
ON cres.compare_id = comp.compare_id,
LATERAL (
SELECT (
cres.run_time_sec_left / comp.series_row_count
)*1e6 AS run_time_ms_left,
(
cres.run_time_sec_right / comp.series_row_count
)*1e6 AS run_time_ms_right
) AS calc
GROUP BY comp.description_sql_left, comp.description_sql_right,
comp.sql_left, comp.sql_right
)
SELECT
CASE WHEN calc.time_range_in_ms_left && calc.time_range_in_ms_right
THEN 'neither' ELSE
CASE WHEN info.average_left > info.average_right THEN
'right' ELSE 'left'
END
END AS faster_sql,
false as applied_threshold,
info.description_sql_left,
info.sql_left,
calc.time_range_in_ms_left,
info.standard_deviation_left,
info.average_left,
info.min_time_ms_left,
info.max_time_ms_left,
info.sql_right,
info.description_sql_right,
calc.time_range_in_ms_right,
info.standard_deviation_right,
info.average_right,
info.min_time_ms_right,
info.max_time_ms_right
FROM info,
LATERAL (
SELECT
NUMRANGE(
average_left - standard_deviation_left,
average_left + standard_deviation_left
) AS time_range_in_ms_left,
NUMRANGE(
average_right - standard_deviation_right,
average_right + standard_deviation_right
) AS time_range_in_ms_right
) AS calc
)
SELECT
final.*,
CASE WHEN final.faster_sql IN ('right', 'neither') THEN
(final.average_left / NULLIF(final.average_right,0))::decimal(18,2)
ELSE
(final.average_right / NULLIF(final.average_left,0))::decimal(18,2)
END AS times_faster
FROM final;
COMMENT ON VIEW benchmark.compare_outcome IS 'Grouping by benchmark.compare, determines which sql statement (left, right or neither) was fastest considering standard deviation.';
CREATE OR REPLACE VIEW benchmark.compare_result_outcome_threashold AS
WITH final AS (
WITH info AS (
SELECT
comp.compare_id,
STDDEV(calc.run_time_ms_left)::decimal(18,4) AS standard_deviation_left,
AVG(calc.run_time_ms_left)::decimal(18,4) AS average_left,
MIN(calc.run_time_ms_left)::decimal(18,4) AS min_time_ms_left,
MAX(calc.run_time_ms_left)::decimal(18,4) AS max_time_ms_left,
STDDEV(calc.run_time_ms_right)::decimal(18,4) AS standard_deviation_right,
AVG(calc.run_time_ms_right)::decimal(18,4) AS average_right,
MIN(calc.run_time_ms_right)::decimal(18,4) AS min_time_ms_right,
MAX(calc.run_time_ms_right)::decimal(18,4) AS max_time_ms_right
FROM benchmark.compare AS comp
INNER JOIN benchmark.compare_result AS cres
ON cres.compare_id = comp.compare_id
INNER JOIN benchmark.compare_result_threashold AS threashold
ON threashold.compare_id = comp.compare_id,
LATERAL (
SELECT (
cres.run_time_sec_left / comp.series_row_count
)*1e6 AS run_time_ms_left,
(
cres.run_time_sec_right / comp.series_row_count
)*1e6 AS run_time_ms_right
) AS calc
WHERE cres.run_time_sec_left
BETWEEN threashold.lower_threashold_left
AND threashold.upper_threashold_left
GROUP BY comp.compare_id
)
SELECT
comp.compare_id,
CASE WHEN calc.time_range_in_ms_left && calc.time_range_in_ms_right
THEN 'neither' ELSE
CASE WHEN info.average_left > info.average_right THEN
'right' ELSE 'left'
END
END AS faster_sql,
true as applied_threshold,
comp.description_sql_left,
calc.time_range_in_ms_left,
info.standard_deviation_left,
info.average_left,
info.min_time_ms_left,
info.max_time_ms_left,
comp.description_sql_right,
calc.time_range_in_ms_right,
info.standard_deviation_right,
info.average_right,
info.min_time_ms_right,
info.max_time_ms_right
FROM info
INNER JOIN benchmark.compare AS comp ON comp.compare_id = info.compare_id,
LATERAL (
SELECT
NUMRANGE(
average_left - standard_deviation_left,
average_left + standard_deviation_left
) AS time_range_in_ms_left,
NUMRANGE(
average_right - standard_deviation_right,
average_right + standard_deviation_right
) AS time_range_in_ms_right
) AS calc
)
SELECT
final.*,
CASE WHEN final.faster_sql IN ('right', 'neither') THEN
(final.average_left / NULLIF(final.average_right,0))::decimal(18,2)
ELSE
(final.average_right / NULLIF(final.average_left,0))::decimal(18,2)
END AS times_faster
FROM final;
COMMENT ON VIEW benchmark.compare_result_outcome_threashold IS 'Grouping by benchmark.compare_result.compare_id, determines which sql statement (left, right or neither) was fastest considering standard deviation and applying a 10% threashold on upper and lower values.';
CREATE OR REPLACE VIEW benchmark.compare_outcome_threashold AS
WITH final AS (
WITH info AS (
SELECT
comp.description_sql_left,
comp.description_sql_right,
comp.sql_left,
comp.sql_right,
STDDEV(calc.run_time_ms_left)::decimal(18,4) AS standard_deviation_left,
AVG(calc.run_time_ms_left)::decimal(18,4) AS average_left,
MIN(calc.run_time_ms_left)::decimal(18,4) AS min_time_ms_left,
MAX(calc.run_time_ms_left)::decimal(18,4) AS max_time_ms_left,
STDDEV(calc.run_time_ms_right)::decimal(18,4) AS standard_deviation_right,
AVG(calc.run_time_ms_right)::decimal(18,4) AS average_right,
MIN(calc.run_time_ms_right)::decimal(18,4) AS min_time_ms_right,
MAX(calc.run_time_ms_right)::decimal(18,4) AS max_time_ms_right
FROM benchmark.compare AS comp
INNER JOIN benchmark.compare_result AS cres
ON cres.compare_id = comp.compare_id
INNER JOIN benchmark.compare_threashold AS threashold
ON threashold.description_sql_left = comp.description_sql_left
AND threashold.description_sql_right = comp.description_sql_right,
LATERAL (
SELECT (
cres.run_time_sec_left / comp.series_row_count
)*1e6 AS run_time_ms_left,
(
cres.run_time_sec_right / comp.series_row_count
)*1e6 AS run_time_ms_right
) AS calc
WHERE cres.run_time_sec_left
BETWEEN threashold.lower_threashold_left
AND threashold.upper_threashold_left
GROUP BY comp.description_sql_left, comp.description_sql_right,
comp.sql_left, comp.sql_right
)
SELECT
CASE WHEN calc.time_range_in_ms_left && calc.time_range_in_ms_right
THEN 'neither' ELSE
CASE WHEN info.average_left > info.average_right THEN
'right' ELSE 'left'
END
END AS faster_sql,
true as applied_threshold,
info.description_sql_left,
info.sql_left,
calc.time_range_in_ms_left,
info.standard_deviation_left,
info.average_left,
info.min_time_ms_left,
info.max_time_ms_left,
info.sql_right,
info.description_sql_right,
calc.time_range_in_ms_right,
info.standard_deviation_right,
info.average_right,
info.min_time_ms_right,
info.max_time_ms_right
FROM info,
LATERAL (
SELECT
NUMRANGE(
average_left - standard_deviation_left,
average_left + standard_deviation_left
) AS time_range_in_ms_left,
NUMRANGE(
average_right - standard_deviation_right,
average_right + standard_deviation_right
) AS time_range_in_ms_right
) AS calc
)
SELECT
final.*,
CASE WHEN final.faster_sql IN ('right', 'neither') THEN
(final.average_left / NULLIF(final.average_right,0))::decimal(18,2)
ELSE
(final.average_right / NULLIF(final.average_left,0))::decimal(18,2)
END AS times_faster
FROM final;
COMMENT ON VIEW benchmark.compare_outcome_threashold IS 'Grouping by benchmark.compare, determines which sql statement (left, right or neither) was fastest considering standard deviation and applying a 10% threashold on upper and lower values.';
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment