lxneng/redshift_admin_queries.sql

## redshift_admin_queries.sql
# List all tables:
select db_id, id, name, sum(rows) as mysum
from stv_tbl_perm where db_id = 100546
group by db_id, id, name order by mysum desc;

# list all running processes:
select pid, query from stv_recents where status = 'Running';

# describe table
select * from PG_TABLE_DEF where tablename='audit_trail';

select * from pg_tables where schemaname = 'public'

# Disk space used:
select sum(used-tossed) as used, sum(capacity) as capacity from stv_partitions

# Query log
select query, starttime , substring from svl_qlog where substring like '%tbl_name%'
order by starttime desc limit 50;

# command history
select * from stl_ddltext where text like '%ox_data_summary_hourly_depot%' limit 10

# last load errors
select starttime, filename, err_reason from stl_load_errors order by starttime desc limit 100

select filename, count(*) as cnt from stl_load_errors group by filename

# create table from another table
select * into newevent from event;

# Check how columns are compressed
ANALYZE COMPRESSION

# ANALYZE and VACUUM
If you insert, update, or delete a significant number of rows in a table, run the ANALYZE and VACUUM commands against the table.
"analyze compression tbl_name" command produce a report with the suggested column encoding.

# To find and diagnose load errors for table 'event'
create view loadview as
(select distinct tbl, trim(name) as table_name, query, starttime,
trim(filename) as input, line_number, field, err_code,
trim(err_reason) as reason
from stl_load_errors sl, stv_tbl_perm sp
where sl.tbl = sp.id);
select * from loadview where table_name='event';

# Query to find blocks used
select stv_tbl_perm.name, count(*)
from stv_blocklist, stv_tbl_perm
where stv_blocklist.tbl = stv_tbl_perm.id
and stv_blocklist.slice = stv_tbl_perm.slice
group by stv_tbl_perm.name
order by stv_tbl_perm.name;

Load tips:
# While loading data you can specify "empty as null", "blanks as null" allow "max error 5", "ignore blank lines", "remove quotes", "use zip". Use the keywords: emptyasnull blanksasnull maxerror 5 ignoreblanklines removequotes gzip
# use NULL AS '\000' to fix the import from specific files
# use BLANKASNULL in the original COPY statement so that no empty strings are loaded into VARCHAR fields which might ultimately be converted to numeric fields.
# Use the NOLOAD keyword with a COPY command to validate the data in the input files before actually loading the data.
# use COMPUPDATE to enable automatic compression
# FILLRECORD to fill missing columns at the end with blanks or NULLs
# TRIMBLANKS Removes the trailing whitespace characters from a VARCHAR string.
# ESCAPE the backslash character (\) in input data is treated as an escape character. (useful for delimiters and embedded newlines)
# ROUNDEC a value of 20.259 is loaded into a DECIMAL(8,2) column is changed to 20.26. or else 20.25
# TRUNCATECOLUMNS Truncates data in columns to the appropriate number.
# IGNOREHEADER to ignore first row

_____

If you are using JDBC, can you try adding the keepalive option to your connect string. E.g.,
jdbc:postgresql://instance.amazonaws.com:8192/database?tcpkeepalive=true
You can have AUTOCOMMIT set in your Workbench client.
_____

In order to avoid timeout error while using workbench on Windows, use the following setting:
HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\KeepAliveTime 30000
HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\KeepAliveInterval 1000
HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\TcpMaxDataRetransmission 10
_____

# Consider using DISTKEY and SORTKEY  - There can be multiple sortkeys but only one primary key.
# wlm_query_slot_count - This will set aside more memory for query, which may avoid operations spilling to disk
# the isolation level for Redshift is SERIALIZABLE

_____

// There is no equivalent of "show create table tbl_name"
select from the PG_TABLE_DEF table to gather all the necessary schema information

// convert to and from unixtime

select extract (epoch from timestamp '2011-08-08 11:11:58');
select TIMESTAMP 'epoch' + starttime * INTERVAL '1 second' starting from tbl_name;

// Update a joined table:

update abcd
set ser_area_code=abcd_update.ser_area_code,
preferences=abcd_update.preferences, opstype=abcd_update.opstype,
phone_type=abcd_update.phone_type
from abcd_update join abcd nc on nc.phone_number = abcd_update.phone_number

http://docs.aws.amazon.com/redshift/latest/dg/t_updating-inserting-using-staging-tables-.html#concept_upsert
_____

// install postgresql
yum install postgresql postgresql-server
chkconfig postgresql on

// You will now create a file where the redshift password will be stored.
vi  ~/.pgpass
c.us-east-1.redshift.amazonaws.com:5439:mydb:root:Passwd

chmod 0600 ~/.pgpass


// load data to redshift
cat to_psql.txt  | psql -hc.us-east-1.redshift.amazonaws.com  -Uroot -p5439 mydb > to_save.csv

// send the file as an attachment
echo "report file attached. " | mutt -s "result data " -a to_save.csv -- some_address@gmail.com

// mysqldump command that will generate the required statements to be used in redshift

mysqldump db_name tbl_name --where='1=1 limit 10' --compact --no-create-info --skip-quote-names > to_psql.txt
_____

Amazon data types are different than of MySQL. For e.g. literals can be saved only as varchar type and upto 65000 bytes.

http://docs.aws.amazon.com/redshift/latest/dg/r_Character_types.html

Here is a script that will do this conversion automatically.

https://gist.github.com/shantanuo/5115366

_____


If postgresql client is installed, we can connect to redshift using something like this...

# PGPASSWORD=Fly8946392085 psql -U fsb_user_85_22719249 -h flydata-sandbox-cluster.clroanynhqjo.us-east-1.redshift.amazonaws.com -p 5439 -d flydatasandboxdb
Welcome to psql 8.1.23 (server 8.0.2), the PostgreSQL interactive terminal.

_____

## script that will display 10 rows from each table

#!/bin/sh

echo "select  name  from stv_tbl_perm where db_id = 100546 group by  name ;"  | psql -hkalc.us-east-1.redshift.amazonaws.com -Uroot -p5439 mydb  > /root/psql.txt 2>> /root/psql_err.txt

for tbl_name in `cat /root/psql.txt`
do
echo "$tbl_name" >> /root/psql_limit.txt 2>> /root/psql_limit_err.txt
echo "select * from $tbl_name limit 10 ; "  | psql -hkalc.us-east-1.redshift.amazonaws.com -Uroot -p5439 mydb  >> /var/www/psql_limit.txt 2>> /root/psql_limit_err.txt
echo "====================================="

done
_____


The following statement queries the STV_LOCKS table to view all locks in effect for current transactions:

select table_id, last_update, lock_owner, lock_owner_pid, lock_status
from stv_locks;

 table_id |        last_update         | lock_owner | lock_owner_pid |      lock_status
----------+----------------------------+------------+----------------+------------------------
   100295 | 2014-01-06 23:50:56.290917 |      95402 |           7723 | Holding write lock
   100304 | 2014-01-06 23:50:57.408457 |      95402 |           7723 | Holding write lock
   100304 | 2014-01-06 23:50:57.409986 |      95402 |           7723 | Holding  insert lock
(3 rows)

The following statement terminates the session holding the locks:

select pg_terminate_backend(7723);


-- Credit: http://oksoft.blogspot.com/2013/03/redshift-tips.html
	# List all tables:
	select db_id, id, name, sum(rows) as mysum
	from stv_tbl_perm where db_id = 100546
	group by db_id, id, name order by mysum desc;

	# list all running processes:
	select pid, query from stv_recents where status = 'Running';

	# describe table
	select * from PG_TABLE_DEF where tablename='audit_trail';

	select * from pg_tables where schemaname = 'public'

	# Disk space used:
	select sum(used-tossed) as used, sum(capacity) as capacity from stv_partitions

	# Query log
	select query, starttime , substring from svl_qlog where substring like '%tbl_name%'
	order by starttime desc limit 50;

	# command history
	select * from stl_ddltext where text like '%ox_data_summary_hourly_depot%' limit 10

	# last load errors
	select starttime, filename, err_reason from stl_load_errors order by starttime desc limit 100

	select filename, count(*) as cnt from stl_load_errors group by filename

	# create table from another table
	select * into newevent from event;

	# Check how columns are compressed
	ANALYZE COMPRESSION

	# ANALYZE and VACUUM
	If you insert, update, or delete a significant number of rows in a table, run the ANALYZE and VACUUM commands against the table.
	"analyze compression tbl_name" command produce a report with the suggested column encoding.

	# To find and diagnose load errors for table 'event'
	create view loadview as
	(select distinct tbl, trim(name) as table_name, query, starttime,
	trim(filename) as input, line_number, field, err_code,
	trim(err_reason) as reason
	from stl_load_errors sl, stv_tbl_perm sp
	where sl.tbl = sp.id);
	select * from loadview where table_name='event';

	# Query to find blocks used
	select stv_tbl_perm.name, count(*)
	from stv_blocklist, stv_tbl_perm
	where stv_blocklist.tbl = stv_tbl_perm.id
	and stv_blocklist.slice = stv_tbl_perm.slice
	group by stv_tbl_perm.name
	order by stv_tbl_perm.name;

	Load tips:
	# While loading data you can specify "empty as null", "blanks as null" allow "max error 5", "ignore blank lines", "remove quotes", "use zip". Use the keywords: emptyasnull blanksasnull maxerror 5 ignoreblanklines removequotes gzip
	# use NULL AS '\000' to fix the import from specific files
	# use BLANKASNULL in the original COPY statement so that no empty strings are loaded into VARCHAR fields which might ultimately be converted to numeric fields.
	# Use the NOLOAD keyword with a COPY command to validate the data in the input files before actually loading the data.
	# use COMPUPDATE to enable automatic compression
	# FILLRECORD to fill missing columns at the end with blanks or NULLs
	# TRIMBLANKS Removes the trailing whitespace characters from a VARCHAR string.
	# ESCAPE the backslash character (\) in input data is treated as an escape character. (useful for delimiters and embedded newlines)
	# ROUNDEC a value of 20.259 is loaded into a DECIMAL(8,2) column is changed to 20.26. or else 20.25
	# TRUNCATECOLUMNS Truncates data in columns to the appropriate number.
	# IGNOREHEADER to ignore first row

	_____

	If you are using JDBC, can you try adding the keepalive option to your connect string. E.g.,
	jdbc:postgresql://instance.amazonaws.com:8192/database?tcpkeepalive=true
	You can have AUTOCOMMIT set in your Workbench client.
	_____

	In order to avoid timeout error while using workbench on Windows, use the following setting:
	HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\KeepAliveTime 30000
	HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\KeepAliveInterval 1000
	HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\TcpMaxDataRetransmission 10
	_____

	# Consider using DISTKEY and SORTKEY - There can be multiple sortkeys but only one primary key.
	# wlm_query_slot_count - This will set aside more memory for query, which may avoid operations spilling to disk
	# the isolation level for Redshift is SERIALIZABLE

	_____

	// There is no equivalent of "show create table tbl_name"
	select from the PG_TABLE_DEF table to gather all the necessary schema information

	// convert to and from unixtime

	select extract (epoch from timestamp '2011-08-08 11:11:58');
	select TIMESTAMP 'epoch' + starttime * INTERVAL '1 second' starting from tbl_name;

	// Update a joined table:

	update abcd
	set ser_area_code=abcd_update.ser_area_code,
	preferences=abcd_update.preferences, opstype=abcd_update.opstype,
	phone_type=abcd_update.phone_type
	from abcd_update join abcd nc on nc.phone_number = abcd_update.phone_number

	http://docs.aws.amazon.com/redshift/latest/dg/t_updating-inserting-using-staging-tables-.html#concept_upsert
	_____

	// install postgresql
	yum install postgresql postgresql-server
	chkconfig postgresql on

	// You will now create a file where the redshift password will be stored.
	vi ~/.pgpass
	c.us-east-1.redshift.amazonaws.com:5439:mydb:root:Passwd

	chmod 0600 ~/.pgpass



	// load data to redshift
	cat to_psql.txt \| psql -hc.us-east-1.redshift.amazonaws.com -Uroot -p5439 mydb > to_save.csv

	// send the file as an attachment
	echo "report file attached. " \| mutt -s "result data " -a to_save.csv -- some_address@gmail.com

	// mysqldump command that will generate the required statements to be used in redshift

	mysqldump db_name tbl_name --where='1=1 limit 10' --compact --no-create-info --skip-quote-names > to_psql.txt
	_____

	Amazon data types are different than of MySQL. For e.g. literals can be saved only as varchar type and upto 65000 bytes.

	http://docs.aws.amazon.com/redshift/latest/dg/r_Character_types.html

	Here is a script that will do this conversion automatically.

	https://gist.github.com/shantanuo/5115366

	_____


	If postgresql client is installed, we can connect to redshift using something like this...

	# PGPASSWORD=Fly8946392085 psql -U fsb_user_85_22719249 -h flydata-sandbox-cluster.clroanynhqjo.us-east-1.redshift.amazonaws.com -p 5439 -d flydatasandboxdb
	Welcome to psql 8.1.23 (server 8.0.2), the PostgreSQL interactive terminal.

	_____

	## script that will display 10 rows from each table

	#!/bin/sh

	echo "select name from stv_tbl_perm where db_id = 100546 group by name ;" \| psql -hkalc.us-east-1.redshift.amazonaws.com -Uroot -p5439 mydb > /root/psql.txt 2>> /root/psql_err.txt

	for tbl_name in `cat /root/psql.txt`
	do
	echo "$tbl_name" >> /root/psql_limit.txt 2>> /root/psql_limit_err.txt
	echo "select * from $tbl_name limit 10 ; " \| psql -hkalc.us-east-1.redshift.amazonaws.com -Uroot -p5439 mydb >> /var/www/psql_limit.txt 2>> /root/psql_limit_err.txt
	echo "====================================="

	done
	_____


	The following statement queries the STV_LOCKS table to view all locks in effect for current transactions:

	select table_id, last_update, lock_owner, lock_owner_pid, lock_status
	from stv_locks;

	table_id \| last_update \| lock_owner \| lock_owner_pid \| lock_status
	----------+----------------------------+------------+----------------+------------------------
	100295 \| 2014-01-06 23:50:56.290917 \| 95402 \| 7723 \| Holding write lock
	100304 \| 2014-01-06 23:50:57.408457 \| 95402 \| 7723 \| Holding write lock
	100304 \| 2014-01-06 23:50:57.409986 \| 95402 \| 7723 \| Holding insert lock
	(3 rows)

	The following statement terminates the session holding the locks:

	select pg_terminate_backend(7723);


	-- Credit: http://oksoft.blogspot.com/2013/03/redshift-tips.html