Skip to content

Instantly share code, notes, and snippets.

@pvgomes
Created January 23, 2019 16:16
Show Gist options
  • Save pvgomes/c3eb4b68505d1d60304dfe968a734434 to your computer and use it in GitHub Desktop.
Save pvgomes/c3eb4b68505d1d60304dfe968a734434 to your computer and use it in GitHub Desktop.
Load data from S3 to Redshift using Python3
############ REQUIREMENTS TO INSTALL ####################
# THIS SCRIPT use category table from AWS sample data (https://docs.aws.amazon.com/redshift/latest/gsg/rs-gsg-create-sample-db.html)
# we use this sample data s3://awssampledbuswest2/tickit/category_pipe.txt
###### Linux users
#`sudo apt-get install libpq-dev`
###### Mac users
#`brew install libpq`
###### ALL USERS
#pip3 install psycopg2-binary
#pip3 install sqlalchemy
#pip3 install sqlalchemy-redshift
###########################################################
import sqlalchemy as sa
from sqlalchemy.orm import sessionmaker
#>>>>>>>> MAKE CHANGES HERE <<<<<<<<<<<<<
DATABASE = "dwtest"
USER = "youruser"
PASSWORD = "yourpassword"
HOST = "dwtest.awsexample.com"
PORT = "5439"
SCHEMA = "public"
S3_FULL_PATH = 's3://yourbucket/category_pipe.txt'
ARN_CREDENTIALS = 'arn:aws:iam::YOURARN:YOURROLE'
REGION = 'us-east-1'
############ CONNECTING AND CREATING SESSIONS ############
connection_string = "redshift+psycopg2://%s:%s@%s:%s/%s" % (USER,PASSWORD,HOST,str(PORT),DATABASE)
engine = sa.create_engine(connection_string)
session = sessionmaker()
session.configure(bind=engine)
s = session()
SetPath = "SET search_path TO %s" % SCHEMA
s.execute(SetPath)
###########################################################
############ RUNNING COPY ############
copy_command = '''
copy category from '%s'
credentials 'aws_iam_role=%s'
delimiter '|' region '%s';
''' % (S3_FULL_PATH, ARN_CREDENTIALS, REGION)
s.execute(copy_command)
s.commit()
######################################
############ GETTING DATA ############
query = "SELECT * FROM category;"
rr = s.execute(query)
all_results = rr.fetchall()
def pretty(all_results):
for row in all_results :
print("row start >>>>>>>>>>>>>>>>>>>>")
for r in row :
print(" ---- %s" % r)
print("row end >>>>>>>>>>>>>>>>>>>>>>")
pretty(all_results)
s.close()
######################################
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment