Skip to content

Instantly share code, notes, and snippets.

@taiwotman
Last active December 18, 2020 00:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save taiwotman/096b51e6fbc702f741c6e4bc388ffb10 to your computer and use it in GitHub Desktop.
Save taiwotman/096b51e6fbc702f741c6e4bc388ffb10 to your computer and use it in GitHub Desktop.
from pyspark.sql import SparkSession
from pyspark.sql.types import DoubleType
from pyspark.sql.functions import col
jar_path = 'rds_jar_driver.jar'
spark = SparkSession \
.builder \
.appName("AWS REDSHIFT PYSPARK APP") \
.config("spark.jars", jar_path)\
.config('spark.driver.extraClassPath', jar_path) \
.getOrCreate()
JDBC_URL='jdbc:redshift://your_redshift_cluster.us-east-1.redshift.amazonaws.com:5439/dev_redshift?user=awsuser&password=yourpassword'
TEMP_DIR='s3://somebucket/tempdir/'
TABLE_NAME='seattle_emergence_calls'
df = spark.read.format("jdbc") \
.option("url",JDBC_URL ) \
.option("Tempdir", TEMP_DIR) \
.option("dbtable", TABLE_NAME) \
.option("forward_spark_s3_credentials", "true") \
.load()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment