Skip to content

Instantly share code, notes, and snippets.

@enkeboll
Created June 27, 2018 17:11
Show Gist options
  • Save enkeboll/aa9ade35d807627253b5aa21fdfbb904 to your computer and use it in GitHub Desktop.
Save enkeboll/aa9ade35d807627253b5aa21fdfbb904 to your computer and use it in GitHub Desktop.
Useful redshift quieries
redshift
-- COMMAND LINE CONNECT:
- seatgeek:
- psql "host=redshift-datawarehouse.service.seatgeek.prod dbname=sganalytic user=andy port=5439"
- greenhouse:
- psql "host=redshift.greenhouse.io dbname=greenhouse user=org_978 port=5439 sslmode=require"
-- CREDENTIALS LINE
CREDENTIALS 'aws_iam_role=arn:aws:iam::093535234988:role/Production-RedshiftCopyUnload'
-- STL_LOAD_ERRORS query
select le.starttime, le.filename, d.query,
d.line_number as line, d.value,
le.err_reason, le.colname, trim(le.raw_line), trim(raw_field_value)
from stl_loaderror_detail d
join stl_load_errors le using (query)
order by starttime DESC
limit 100;
-- SEE RUNNING QUERIES AND KILL
select pid,
user_name,
starttime,
datediff(minute, starttime, getdate()) as elapsed_minutes,
trim(query) as query
from stv_recents
where status='Running'
order by 4 desc;
cancel XXXX;
-- FIND LOCKS
select relname, locks.pid, locks."mode", ses.user_name, ses.starttime, datediff(minute, ses.starttime, getdate()) as elapsed_minutes
from PG_LOCKS locks
join PG_CLASS cls on locks.relation = cls.oid
left join STV_SESSIONS ses on locks.pid = ses.process
-- where relname = 'actions_fat'
order by starttime asc;
select * from STL_QUERY where pid = XXXX;
pg_terminate_backend(XXXX);
-- get query text for transaction ids:
SELECT trim(querytxt) AS query FROM stl_query WHERE xid = 'txn_id';
-- HAS_TABLE_PRIVILEGE
select has_table_privilege('username', 'private.seatgeek_transactions', 'select');
-- GET ALL GRANTS ON TABLES FOR ALL USERS
WITH v_get_obj_priv_by_user AS (
SELECT
*
FROM
(
SELECT
schemaname
,objectname
,usename
,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'select') AS sel
,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'insert') AS ins
,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'update') AS upd
,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'delete') AS del
,HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'references') AS ref
FROM
(
SELECT schemaname, 't' AS obj_type, tablename AS objectname, QUOTE_IDENT(schemaname) || '.' || QUOTE_IDENT(tablename) AS fullobj FROM pg_tables
WHERE schemaname not in ('pg_internal')
UNION
SELECT schemaname, 'v' AS obj_type, viewname AS objectname, QUOTE_IDENT(schemaname) || '.' || QUOTE_IDENT(viewname) AS fullobj FROM pg_views
WHERE schemaname not in ('pg_internal')
) AS objs
,(SELECT * FROM pg_user) AS usrs
ORDER BY fullobj
)
WHERE (sel = true or ins = true or upd = true or del = true or ref = true)
)
SELECT
schemaname
,objectname
,usename
,REVERSE(SUBSTRING(REVERSE(CASE WHEN sel IS TRUE THEN 'GRANT SELECT ON ' + QUOTE_IDENT(schemaname) + '.' + QUOTE_IDENT(objectname) + ' TO ' + usename + ';\n' ELSE '' END +
CASE WHEN ins IS TRUE THEN 'GRANT INSERT ON ' + QUOTE_IDENT(schemaname) + '.' + QUOTE_IDENT(objectname) + ' TO ' + usename + ';\n' ELSE '' END +
CASE WHEN upd IS TRUE THEN 'GRANT UPDATE ON ' + QUOTE_IDENT(schemaname) + '.' + QUOTE_IDENT(objectname) + ' TO ' + usename + ';\n' ELSE '' END +
CASE WHEN del IS TRUE THEN 'GRANT DELETE ON ' + QUOTE_IDENT(schemaname) + '.' + QUOTE_IDENT(objectname) + ' TO ' + usename + ';\n' ELSE '' END +
CASE WHEN ref IS TRUE THEN 'GRANT REFERENCES ON ' + QUOTE_IDENT(schemaname) + '.' + QUOTE_IDENT(objectname) + ' TO ' + usename + ';\n' ELSE '' END), 2)) AS ddl
FROM v_get_obj_priv_by_user
-- VIEW ALL USERS
select * from pg_user;
-- VIEW USERS WHO HAVE QUERIED FOR THE WORD "private"
select query, usename, userid, trim(querytxt) as sqlquery
from stl_query q
left join pg_user u on q.userid = u.usesysid
where userid not in (1, 100, 103)
and lower(querytxt) like '%private%'
and starttime >= dateadd(month,-1,CURRENT_DATE)
order by 2;
-- GET SKEWED TABLES INTO TEMP TABLE
DROP TABLE IF EXISTS temp_staging_tables_1;
DROP TABLE IF EXISTS temp_staging_tables_2;
DROP TABLE IF EXISTS temp_tables_report;
CREATE TEMP TABLE temp_staging_tables_1
(schemaname TEXT,
tablename TEXT,
tableid BIGINT,
size_in_megabytes BIGINT);
INSERT INTO temp_staging_tables_1
SELECT n.nspname, c.relname, c.oid,
(SELECT COUNT(*) FROM STV_BLOCKLIST b WHERE b.tbl = c.oid)
FROM pg_namespace n, pg_class c
WHERE n.oid = c.relnamespace
AND nspname NOT IN ('pg_catalog', 'pg_toast', 'information_schema','pg_internal')
AND c.relname <> 'temp_staging_tables_1';
CREATE TEMP TABLE temp_staging_tables_2
(tableid BIGINT,
min_blocks_per_slice BIGINT,
max_blocks_per_slice BIGINT,
slice_count BIGINT);
INSERT INTO temp_staging_tables_2
SELECT tableid, MIN(c), MAX(c), COUNT(DISTINCT slice)
FROM (SELECT t.tableid, slice, COUNT(*) AS c
FROM temp_staging_tables_1 t, STV_BLOCKLIST b
WHERE t.tableid = b.tbl
GROUP BY t.tableid, slice)
GROUP BY tableid;
CREATE TEMP TABLE temp_tables_report
(schemaname TEXT,
tablename TEXT,
tableid BIGINT,
size_in_mb BIGINT,
has_dist_key INT,
has_sort_key INT,
has_col_encoding INT,
pct_skew_across_slices FLOAT,
pct_slices_populated FLOAT);
INSERT INTO temp_tables_report
SELECT t1.*,
CASE WHEN EXISTS (SELECT *
FROM pg_attribute a
WHERE t1.tableid = a.attrelid
AND a.attnum > 0
AND NOT a.attisdropped
AND a.attisdistkey = 't')
THEN 1 ELSE 0 END,
CASE WHEN EXISTS (SELECT *
FROM pg_attribute a
WHERE t1.tableid = a.attrelid
AND a.attnum > 0
AND NOT a.attisdropped
AND a.attsortkeyord > 0)
THEN 1 ELSE 0 END,
CASE WHEN EXISTS (SELECT *
FROM pg_attribute a
WHERE t1.tableid = a.attrelid
AND a.attnum > 0
AND NOT a.attisdropped
AND a.attencodingtype <> 0)
THEN 1 ELSE 0 END,
100 * CAST(t2.max_blocks_per_slice - t2.min_blocks_per_slice AS FLOAT)
/ CASE WHEN (t2.min_blocks_per_slice = 0)
THEN 1 ELSE t2.min_blocks_per_slice END,
CAST(100 * t2.slice_count AS FLOAT) / (SELECT COUNT(*) FROM STV_SLICES)
FROM temp_staging_tables_1 t1, temp_staging_tables_2 t2
WHERE t1.tableid = t2.tableid;
SELECT *, pct_skew_across_slices + (100 - pct_slices_populated) as comb FROM temp_tables_report
ORDER BY comb desc;
--GET ALL UDF SIGNATURES
select proname, proargnames, proargtypes, prorettype::regtype
from pg_proc
where proowner != 1 order by 1;
--GET UDF SOURCE FROM A NAME (from psql)
\df+ f_udf_name
-- VIEW ALL GROUPS A USER BELONGS TO
select groname as group_name
from pg_user
left join pg_group on usesysid = any(grolist)
where usename = 'USERNAME';
-- VIEW ALL USERS IN ALL GROUPS
select groname, usename
from pg_group
left join pg_user on pg_user.usesysid = any(grolist)
order by 1, 2
-- GET ALL SCHEMA GRANTS FOR A SPECIFIC USER
select nspname, array_to_string(nspacl, '|') ns from pg_namespace where array_to_string(nspacl, '|') like '%ross%' order by 1;
--GET ALL TABLE GRANTS FOR A SPECIFIC USER
WITH
usrs as (SELECT * FROM pg_user),
objs as (
SELECT
schemaname, 't' AS obj_type,
tablename AS objectname,
schemaname + '.' + tablename AS fullobj
FROM pg_tables
WHERE schemaname not in ('pg_internal')
UNION
SELECT
schemaname, 'v' AS obj_type,
viewname AS objectname,
schemaname + '.' + viewname AS fullobj
FROM pg_views
WHERE schemaname NOT IN ('pg_internal')
),
query as (
SELECT
schemaname,
objectname,
usename,
HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'select') AS sel,
HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'insert') AS ins,
HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'update') AS upd,
HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'delete') AS del,
HAS_TABLE_PRIVILEGE(usrs.usename, fullobj, 'references') AS ref
FROM objs, usrs
ORDER BY fullobj
)
SELECT * FROM query
WHERE (
sel = TRUE
OR ins = TRUE
OR upd = TRUE
OR del = TRUE
OR ref = TRUE
)
AND schemaname not in ('pg_catalog', 'information_schema')
AND usename = 'samsh_readonly';
-- REVOKE EVERYTHING IN A SCHEMA
REVOKE ALL ON SCHEMA xxxx FROM xxxx;
REVOKE USAGE ON SCHEMA xxxx FROM xxxx;
REVOKE ALL ON ALL TABLES IN SCHEMA xxxx FROM xxxx;
-- PIPELINE REFRESH
drop table sellerdirect_orders;
delete from pipeline_table_index where datastore_name = 'sellerdirect' and "table_name" = 'orders';
$BIN_PATH/python $APP_PATH/enterprise/rds_pipeline/pipeline.py -ds sellerdirect -t orders
-- RELIABLE TABLE UNLOAD / COPY
UNLOAD ('SELECT * FROM tbl_example') TO 's3://s3bucket/tbl_example' CREDENTIALS 'aws_iam_role=arn:aws:iam::093535234988:role/Production-RedshiftCopyUnload'
DELIMITER '|' MANIFEST ADDQUOTES ESCAPE ALLOWOVERWRITE;
COPY tbl_example2 FROM 's3://s3bucket/tbl_example' CREDENTIALS 'aws_iam_role=arn:aws:iam::093535234988:role/Production-RedshiftCopyUnload' DELIMITER '|' MANIFEST REMOVEQUOTES ESCAPE;
-- WHEN HAVE USERS ACCESSED THE DW
select u.usename as username,
min(recordtime) as first_login,
max(recordtime) as last_login,
count(recordtime) as successful_connections
from pg_user u
left join STL_CONNECTION_LOG cl on cl.username = u.usename and event = 'authenticated'
group by 1
order by 1;
-- LARGEST TABLES IN WAREHOUSE
SELECT TRIM(pgdb.datname) AS Database,
TRIM(a.name) AS Table,
TRIM(pn.nspname) AS Schema,
((b.mbytes/part.total::decimal)*100)::decimal(5,2) AS pct_of_total,
b.mbytes,
b.unsorted_mbytes
FROM stv_tbl_perm a
JOIN pg_database AS pgdb
ON pgdb.oid = a.db_id
JOIN pg_class pc
ON pc.oid = a.id
JOIN pg_namespace pn
ON pn.oid = relnamespace
JOIN ( SELECT tbl,
SUM( DECODE(unsorted, 1, 1, 0)) AS unsorted_mbytes,
COUNT(*) AS mbytes
FROM stv_blocklist
GROUP BY tbl ) AS b
ON a.id = b.tbl
JOIN ( SELECT SUM(capacity) AS total
FROM stv_partitions
WHERE part_begin = 0 ) AS part
ON 1 = 1
WHERE a.slice = 0
ORDER BY 5 desc, db_id, name
-- COPY COMMAND PERFORMANCE
select q.starttime, s.query, substring(q.querytxt,1,120) as querytxt,
s.n_files, size_mb, s.time_seconds,
s.size_mb/decode(s.time_seconds,0,1,s.time_seconds) as mb_per_s
from (select query, count(*) as n_files,
sum(transfer_size/(1024*1024)) as size_MB, (max(end_Time) -
min(start_Time))/(1000000) as time_seconds , max(end_time) as end_time
from stl_s3client where http_method = 'GET' and query > 0
and transfer_time > 0 group by query ) as s
LEFT JOIN stl_Query as q on q.query = s.query
where s.end_Time >= dateadd(day, -7, current_Date)
order by s.time_Seconds desc, size_mb desc, s.end_time desc
limit 50;
-- VIEW USERS' SSL CONNECTIONS
select username, max(recordtime), sslversion, sslcipher
from STL_CONNECTION_LOG
where recordtime > getdate() - interval '7 days'
group by 1, 3, 4
order by 1, 2 desc
-- GENERATE DDL SCRIPT
select ddl
from admin_views.v_generate_tbl_ddl
where schemaname = 'public'
and tablename = 'TABLENAME'
order by seq;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment