Skip to content

Instantly share code, notes, and snippets.

@nelsonsequiera
Last active December 31, 2020 06:04
Show Gist options
  • Save nelsonsequiera/0e9660e40f973cbd571b06f23e725d87 to your computer and use it in GitHub Desktop.
Save nelsonsequiera/0e9660e40f973cbd571b06f23e725d87 to your computer and use it in GitHub Desktop.
Redshift usefull Queries
Redshift usefull queries
WITH base_skew AS (
SELECT id AS tbl
FROM stv_tbl_perm
GROUP BY id
HAVING SUM(ROWS) > (
SELECT 1000000 * (MAX(node) + 1)
FROM stv_slices
)
AND (MAX(ROWS) / (MIN(ROWS) + 1.0)) >= 2.0
AND SUM(ROWS) <> (
MAX(ROWS) * (
SELECT MAX(node) + 1
FROM stv_slices
)
)
),
table_slices AS (
SELECT t.slice,
t.id,
COUNT(b.blocknum) s_mb,
COUNT(DISTINCT col) * COUNT(DISTINCT unsorted) ss_mb
FROM stv_tbl_perm t
LEFT JOIN stv_blocklist b ON (
t.id = b.tbl
AND t.slice = b.slice
AND b.temporary = 0
AND b.tombstone = 0
)
WHERE t.slice < 6400
AND t.temp = 0
AND t.id IN (
SELECT tbl
FROM base_skew
)
GROUP BY t.id,
t.slice
),
skew_dist AS (
SELECT slice,
id AS tbl,
SUM(s_mb) AS total_size_mb,
MAX(ss_mb) AS slice_size_mb,
SUM(s_mb)::float - AVG(SUM(s_mb)::float) OVER (PARTITION BY tbl) AS dist_from_mean
FROM table_slices
GROUP BY tbl,
slice
),
skew_degree AS (
SELECT tbl,
SUM(total_size_mb) table_size_mb,
MIN(total_size_mb) min_slice_mb,
MAX(total_size_mb) max_slice_mb,
MAX(slice_size_mb) slice_size_mb,
COALESCE(
SUM(dist_from_mean ^ 2)::float / NULLIF(SUM(total_size_mb ^ 2)::float, 0),
0
) AS slice_skew_degree
FROM skew_dist
GROUP BY tbl
)
SELECT schema || '.' || "table" AS tablename,
diststyle,
ROUND(slice_skew_degree, 5) AS slice_skew_degree,
min_slice_mb,
max_slice_mb,
table_size_mb,
skew_rows,
CASE
WHEN max_slice_mb > slice_size_mb * 10 THEN '*'
ELSE ''
END AS advisor_flagged
FROM skew_degree
JOIN svv_table_info ti ON ti.table_id = tbl
WHERE slice_skew_degree >= 0.50
ORDER BY advisor_flagged DESC,
slice_skew_degree DESC;
SELECT "column",
TYPE,
distkey,
sortkey
FROM pg_table_def
WHERE schemaname = 'public'
AND tablename = <tablename>
AND (
distkey = TRUE
OR sortkey <> 0
);
SELECT ti.schema,
ti.table tablename,
raw_size.size uncompressed_mb,
ti.size total_mb
FROM svv_table_info ti
LEFT JOIN (
SELECT tbl table_id,
COUNT(*) size
FROM stv_blocklist
WHERE (tbl, col) IN (
SELECT attrelid,
attnum -1
FROM pg_attribute
WHERE attencodingtype IN (0, 128)
AND attnum > 0
AND attsortkeyord != 1
)
GROUP BY tbl
) raw_size USING (table_id)
WHERE raw_size.size IS NOT NULL
ORDER BY raw_size.size DESC;
select TABLE_NAME from INFORMATION_SCHEMA.COLUMNS where COLUMN_NAME like '%<col>%' ;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment