Skip to content

Instantly share code, notes, and snippets.

@jettify
Forked from saiteja09/script.py
Created January 24, 2018 22:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jettify/7c55435c81dac092092a89a0114f027e to your computer and use it in GitHub Desktop.
Save jettify/7c55435c81dac092092a89a0114f027e to your computer and use it in GitHub Desktop.
Glue Job Script for reading data from DataDirect Salesforce JDBC driver and write it to S3
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.dynamicframe import DynamicFrame
from awsglue.job import Job
args = getResolvedOptions(sys.argv, ['JOB_NAME'])
sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
##Read Data from Salesforce using DataDirect JDBC driver in to DataFrame
source_df = spark.read.format("jdbc").option("url","jdbc:datadirect:sforce://login.salesforce.com;SecurityToken=<token>").option("dbtable", "SFORCE.OPPORTUNITY").option("driver", "com.ddtek.jdbc.sforce.SForceDriver").option("user", "user@mail.com").option("password", "pass123").load()
job.init(args['JOB_NAME'], args)
##Convert DataFrames to AWS Glue's DynamicFrames Object
dynamic_dframe = DynamicFrame.fromDF(source_df, glueContext, "dynamic_df")
##Write Dynamic Frames to S3 in CSV format. You can write it to any rds/redshift, by using the connection that you have defined previously in Glue
datasink4 = glueContext.write_dynamic_frame.from_options(frame = dynamic_dframe, connection_type = "s3", connection_options = {"path": "s3://glueuserdata"}, format = "csv", transformation_ctx = "datasink4")
job.commit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment