Skip to content

Instantly share code, notes, and snippets.

@karol-blaszczyk
Created November 4, 2019 16:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save karol-blaszczyk/3f5edc49c88a4e88bb296481972d927f to your computer and use it in GitHub Desktop.
Save karol-blaszczyk/3f5edc49c88a4e88bb296481972d927f to your computer and use it in GitHub Desktop.
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
## @params: [JOB_NAME]
args = getResolvedOptions(sys.argv, ['JOB_NAME'])
sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
job.init(args['JOB_NAME'], args)
datasource0 = glueContext.create_dynamic_frame.from_catalog(database = "reports", table_name = "performance_reports", transformation_ctx = "datasource0")
applymapping1 = ApplyMapping.apply(frame = datasource0, mappings = [("campaign id", "long", "campaign id", "long"), ("campaign", "string", "campaign", "string"), ("clicks", "long", "clicks", "long"), ("cost", "long", "cost", "long"), ("day", "string", "day", "string"), ("impressions", "long", "impressions", "long"), ("views", "long", "views", "long")], transformation_ctx = "applymapping1")
resolvechoice2 = ResolveChoice.apply(frame = applymapping1, choice = "make_cols", transformation_ctx = "resolvechoice2")
dropnullfields3 = DropNullFields.apply(frame = resolvechoice2, transformation_ctx = "dropnullfields3")
datasink4 = glueContext.write_dynamic_frame.from_jdbc_conf(frame = dropnullfields3, catalog_connection = "ReportsDB", connection_options = {"dbtable": "reports", "database": "glue_tutorial"}, transformation_ctx = "datasink4")
job.commit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment