Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Quick and dirty Python script to fetch a Redshift Spectrum's create table statement
# Copyright (c) 2017 Ikai Lan
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# To use this script:
# pip install psychopg2
from __future__ import print_function
import psycopg2
PG_PASSWORD = 'PASSWORD GOES HERE'
PG_HOST = 'REDSHIFT HOST GOES HERE'
PG_PORT = 5439
PG_USER = 'USERNAME'
SCHEMA_NAME = 'YOUR SPECTRUM SCHEMA NAME GOES HERE'
TABLE_NAME = 'YOUR TABLE NAME HERE'
TABLE_SCHEMA_QUERY = """
SELECT columnname, external_type
FROM svv_external_columns
WHERE schemaname=(%s) AND tablename=(%s)
ORDER BY columnnum ASC"""
TABLE_LOCATION_QUERY = """
SELECT location
FROM svv_external_tables
WHERE schemaname=(%s) AND tablename=(%s)
LIMIT 1
"""
def main():
conn = psycopg2.connect(dbname='dev',
host=PG_HOST,
port=PG_PORT,
user=PG_USER,
password=PG_PASSWORD)
table_name = TABLE_NAME
cur = conn.cursor()
cur.execute(TABLE_SCHEMA_QUERY, (SCHEMA_NAME, table_name,))
schema = cur.fetchall()
cur.close()
cur = conn.cursor()
cur.execute(TABLE_LOCATION_QUERY, (SCHEMA_NAME, table_name,))
location = cur.fetchone()
create_table_lines = []
create_table_lines.append(
'CREATE EXTERNAL TABLE {schemaname}.{tablename}('.format(
schemaname=SCHEMA_NAME, tablename=table_name))
column_lines = []
for column, column_type in schema:
column_lines.append(
'{column} {column_type}'.format(column=column, column_type=column_type))
columns = ',\n'.join(column_lines) + ')'
create_table_lines.append(columns)
# TODO read the format from svv_external_tables.input_format and branch on that code
create_table_lines.append('STORED AS parquet')
create_table_lines.append("LOCATION '{location}'".format(location=location[0]))
create_table_statement = '\n'.join(create_table_lines) + ';'
print(create_table_statement)
cur.close()
conn.close()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.