jmindek/generate_sql_with_col_names.sql

## generate_sql_with_col_names.sql
/*
* Have tried to generate SQL statements by first retrieving table column names
* from Redshift pg_* tables?
*
* You probably give up due to errors regarding invalid types.
*
* Here is my suggestion for accomplishing this.
*
* First, use the create table statement below to create a temp table with the col_names for our table of interest.
* We must create a table otherwise we get an error that listagg must be used on a user-created table.
*/
DROP TABLE IF EXISTS col_names;

CREATE TEMP TABLE col_names AS
SELECT attrelid::int as tableid, attname::text AS column_name, attnum AS column_num
FROM pg_attribute
WHERE attrelid in
	(SELECT pg_class.oid FROM pg_class INNER JOIN pg_namespace ON pg_class.relnamespace = pg_namespace.oid
		WHERE relname = '<table_with_desired_columns>' AND nspname = '<table_schema>' ) AND
	attname NOT IN ('insertxid', 'deletexid', 'oid', 'tableoid', 'xmin', 'cmin', 'xmax', 'cmax', 'ctid') AND
;

-- Then you can LISTAGG the columns to get a string of the column names.
SELECT LISTAGG(column_name, ', ') FROM col_names;

-- You can do interesting things like ordering your column names:
SELECT LISTAGG(column_name, ', ') FROM col_names group by column_name order by column_name;

-- You can also use this to do pretty advanced SQL generation.
--
-- For example, I want to generate JSON objects for each row per tables.
-- I did not want to hand type the SQL statement for each of the 10+ tables I wanted this for.
-- The following SQL statement gave me the expression that I used in a select statement.
SELECT REGEXP_REPLACE(
	LISTAGG('COALESCE(\\' || '''' || '"' || column_name || '"' || ':"\\'
 || '''' || ' || ' || column_name || ' || CHR(34), \\' || '''' || '"' || column_name || '"' || ':\\' || '''' || ') ' || ' || '','' || \n ' ) within group (order by column_num)
 	,'\',\' \\|\\| \n $'
	,''
 )
FROM col_names
;
-- It outputs:
/*
COALESCE(\'"id":"\' || id || CHR(34), \'"id":\')  || ',' ||
 COALESCE(\'"npi":"\' || npi || CHR(34), \'"npi":\')  || ',' ||
 COALESCE(\'"first_name":"\' || first_name || CHR(34), \'"first_name":\')  || ',' ||
 COALESCE(\'"last_name":"\' || last_name || CHR(34), \'"last_name":\')  || ',' ||
 COALESCE(\'"address_line_1":"\' || address_line_1 || CHR(34), \'"address_line_1":\')  || ',' ||
 COALESCE(\'"address_line_2":"\' || address_line_2 || CHR(34), \'"address_line_2":\')  || ',' ||
 COALESCE(\'"city":"\' || city || CHR(34), \'"city":\')  || ',' ||
 COALESCE(\'"state":"\' || state || CHR(34), \'"state":\')  || ',' ||
 COALESCE(\'"zip_code":"\' || zip_code || CHR(34), \'"zip_code":\')  || ',' ||
 COALESCE(\'"specialty_type":"\' || specialty_type || CHR(34), \'"specialty_type":\')  || ',' ||
 COALESCE(\'"_partition_id":"\' || _partition_id || CHR(34), \'"_partition_id":\')  || ',' ||
 COALESCE(\'"full_name":"\' || full_name || CHR(34), \'"full_name":\')  ||
 */
-- Which I use in a SELECT statement to output each row as a JSON object:
SELECT
id as key,
'{' ||
 COALESCE(\'"id":"\' || id || CHR(34), \'"id":\')  || ',' ||
 COALESCE(\'"npi":"\' || npi || CHR(34), \'"npi":\')  || ',' ||
 COALESCE(\'"first_name":"\' || first_name || CHR(34), \'"first_name":\')  || ',' ||
 COALESCE(\'"last_name":"\' || last_name || CHR(34), \'"last_name":\')  || ',' ||
 COALESCE(\'"address_line_1":"\' || address_line_1 || CHR(34), \'"address_line_1":\')  || ',' ||
 COALESCE(\'"address_line_2":"\' || address_line_2 || CHR(34), \'"address_line_2":\')  || ',' ||
 COALESCE(\'"city":"\' || city || CHR(34), \'"city":\')  || ',' ||
 COALESCE(\'"state":"\' || state || CHR(34), \'"state":\')  || ',' ||
 COALESCE(\'"zip_code":"\' || zip_code || CHR(34), \'"zip_code":\')  || ',' ||
 COALESCE(\'"specialty_type":"\' || specialty_type || CHR(34), \'"specialty_type":\')  || ',' ||
 COALESCE(\'"_partition_id":"\' || _partition_id || CHR(34), \'"_partition_id":\')  || ',' ||
 COALESCE(\'"full_name":"\' || full_name || CHR(34), \'"full_name":\')  ||
'}'
FROM provider
;

-- Note: I am escaping the single quotes because this query is stored in a file which is read into a Python script.
-- Single quotes have special meaning in Python, thus we need to escape them.
	/*
	* Have tried to generate SQL statements by first retrieving table column names
	* from Redshift pg_* tables?
	*
	* You probably give up due to errors regarding invalid types.
	*
	* Here is my suggestion for accomplishing this.
	*
	* First, use the create table statement below to create a temp table with the col_names for our table of interest.
	* We must create a table otherwise we get an error that listagg must be used on a user-created table.
	*/
	DROP TABLE IF EXISTS col_names;

	CREATE TEMP TABLE col_names AS
	SELECT attrelid::int as tableid, attname::text AS column_name, attnum AS column_num
	FROM pg_attribute
	WHERE attrelid in
	(SELECT pg_class.oid FROM pg_class INNER JOIN pg_namespace ON pg_class.relnamespace = pg_namespace.oid
	WHERE relname = '<table_with_desired_columns>' AND nspname = '<table_schema>' ) AND
	attname NOT IN ('insertxid', 'deletexid', 'oid', 'tableoid', 'xmin', 'cmin', 'xmax', 'cmax', 'ctid') AND
	;

	-- Then you can LISTAGG the columns to get a string of the column names.
	SELECT LISTAGG(column_name, ', ') FROM col_names;

	-- You can do interesting things like ordering your column names:
	SELECT LISTAGG(column_name, ', ') FROM col_names group by column_name order by column_name;

	-- You can also use this to do pretty advanced SQL generation.
	--
	-- For example, I want to generate JSON objects for each row per tables.
	-- I did not want to hand type the SQL statement for each of the 10+ tables I wanted this for.
	-- The following SQL statement gave me the expression that I used in a select statement.
	SELECT REGEXP_REPLACE(
	LISTAGG('COALESCE(\\' \|\| '''' \|\| '"' \|\| column_name \|\| '"' \|\| ':"\\'
	\|\| '''' \|\| ' \|\| ' \|\| column_name \|\| ' \|\| CHR(34), \\' \|\| '''' \|\| '"' \|\| column_name \|\| '"' \|\| ':\\' \|\| '''' \|\| ') ' \|\| ' \|\| '','' \|\| \n ' ) within group (order by column_num)
	,'\',\' \\\|\\\| \n $'
	,''
	)
	FROM col_names
	;
	-- It outputs:
	/*
	COALESCE(\'"id":"\' \|\| id \|\| CHR(34), \'"id":\') \|\| ',' \|\|
	COALESCE(\'"npi":"\' \|\| npi \|\| CHR(34), \'"npi":\') \|\| ',' \|\|
	COALESCE(\'"first_name":"\' \|\| first_name \|\| CHR(34), \'"first_name":\') \|\| ',' \|\|
	COALESCE(\'"last_name":"\' \|\| last_name \|\| CHR(34), \'"last_name":\') \|\| ',' \|\|
	COALESCE(\'"address_line_1":"\' \|\| address_line_1 \|\| CHR(34), \'"address_line_1":\') \|\| ',' \|\|
	COALESCE(\'"address_line_2":"\' \|\| address_line_2 \|\| CHR(34), \'"address_line_2":\') \|\| ',' \|\|
	COALESCE(\'"city":"\' \|\| city \|\| CHR(34), \'"city":\') \|\| ',' \|\|
	COALESCE(\'"state":"\' \|\| state \|\| CHR(34), \'"state":\') \|\| ',' \|\|
	COALESCE(\'"zip_code":"\' \|\| zip_code \|\| CHR(34), \'"zip_code":\') \|\| ',' \|\|
	COALESCE(\'"specialty_type":"\' \|\| specialty_type \|\| CHR(34), \'"specialty_type":\') \|\| ',' \|\|
	COALESCE(\'"_partition_id":"\' \|\| _partition_id \|\| CHR(34), \'"_partition_id":\') \|\| ',' \|\|
	COALESCE(\'"full_name":"\' \|\| full_name \|\| CHR(34), \'"full_name":\') \|\|
	*/
	-- Which I use in a SELECT statement to output each row as a JSON object:
	SELECT
	id as key,
	'{' \|\|
	COALESCE(\'"id":"\' \|\| id \|\| CHR(34), \'"id":\') \|\| ',' \|\|
	COALESCE(\'"npi":"\' \|\| npi \|\| CHR(34), \'"npi":\') \|\| ',' \|\|
	COALESCE(\'"first_name":"\' \|\| first_name \|\| CHR(34), \'"first_name":\') \|\| ',' \|\|
	COALESCE(\'"last_name":"\' \|\| last_name \|\| CHR(34), \'"last_name":\') \|\| ',' \|\|
	COALESCE(\'"address_line_1":"\' \|\| address_line_1 \|\| CHR(34), \'"address_line_1":\') \|\| ',' \|\|
	COALESCE(\'"address_line_2":"\' \|\| address_line_2 \|\| CHR(34), \'"address_line_2":\') \|\| ',' \|\|
	COALESCE(\'"city":"\' \|\| city \|\| CHR(34), \'"city":\') \|\| ',' \|\|
	COALESCE(\'"state":"\' \|\| state \|\| CHR(34), \'"state":\') \|\| ',' \|\|
	COALESCE(\'"zip_code":"\' \|\| zip_code \|\| CHR(34), \'"zip_code":\') \|\| ',' \|\|
	COALESCE(\'"specialty_type":"\' \|\| specialty_type \|\| CHR(34), \'"specialty_type":\') \|\| ',' \|\|
	COALESCE(\'"_partition_id":"\' \|\| _partition_id \|\| CHR(34), \'"_partition_id":\') \|\| ',' \|\|
	COALESCE(\'"full_name":"\' \|\| full_name \|\| CHR(34), \'"full_name":\') \|\|
	'}'
	FROM provider
	;

	-- Note: I am escaping the single quotes because this query is stored in a file which is read into a Python script.
	-- Single quotes have special meaning in Python, thus we need to escape them.