trungpv1601/Test partition job.py

## Test partition job.py
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from datetime import datetime

args = getResolvedOptions(sys.argv, ["JOB_NAME"])
sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
job.init(args["JOB_NAME"], args)

# Script generated for node Amazon S3
AmazonS3_node1702009815377 = glueContext.create_dynamic_frame.from_options(
    format_options={"multiline": False},
    connection_type="s3",
    format="json",
    connection_options={
        "paths": ["s3://xxx/a/"],
        "recurse": True,
    },
    transformation_ctx="AmazonS3_node1702009815377",
)

def AddDate(rec):
    # Assuming the timestamp is an integer number of seconds since the epoch
    ts = int(rec["creation_timestamp"])
    # Convert the timestamp to a datetime object
    dt_object = datetime.utcfromtimestamp(ts)
    # Format the datetime object as a date string
    rec["year"] = dt_object.strftime("%Y")
    rec["month"] = dt_object.strftime("%m")
    rec["day"] = dt_object.strftime("%d")
    return rec

Mapped_dyF = Map.apply(frame = AmazonS3_node1702009815377, f = AddDate)

# Script generated for node Amazon S3
AmazonS3_node1702010667689 = glueContext.write_dynamic_frame.from_options(
    frame=Mapped_dyF,
    connection_type="s3",
    format="glueparquet",
    connection_options={
        "path": "s3://xxx/b/",
        "partitionKeys": ["year", "month", "day"],
    },
    format_options={"compression": "snappy"},
    transformation_ctx="AmazonS3_node1702010667689",
)

job.commit()
	import sys
	from awsglue.transforms import *
	from awsglue.utils import getResolvedOptions
	from pyspark.context import SparkContext
	from awsglue.context import GlueContext
	from awsglue.job import Job
	from datetime import datetime

	args = getResolvedOptions(sys.argv, ["JOB_NAME"])
	sc = SparkContext()
	glueContext = GlueContext(sc)
	spark = glueContext.spark_session
	job = Job(glueContext)
	job.init(args["JOB_NAME"], args)

	# Script generated for node Amazon S3
	AmazonS3_node1702009815377 = glueContext.create_dynamic_frame.from_options(
	format_options={"multiline": False},
	connection_type="s3",
	format="json",
	connection_options={
	"paths": ["s3://xxx/a/"],
	"recurse": True,
	},
	transformation_ctx="AmazonS3_node1702009815377",
	)

	def AddDate(rec):
	# Assuming the timestamp is an integer number of seconds since the epoch
	ts = int(rec["creation_timestamp"])
	# Convert the timestamp to a datetime object
	dt_object = datetime.utcfromtimestamp(ts)
	# Format the datetime object as a date string
	rec["year"] = dt_object.strftime("%Y")
	rec["month"] = dt_object.strftime("%m")
	rec["day"] = dt_object.strftime("%d")
	return rec

	Mapped_dyF = Map.apply(frame = AmazonS3_node1702009815377, f = AddDate)

	# Script generated for node Amazon S3
	AmazonS3_node1702010667689 = glueContext.write_dynamic_frame.from_options(
	frame=Mapped_dyF,
	connection_type="s3",
	format="glueparquet",
	connection_options={
	"path": "s3://xxx/b/",
	"partitionKeys": ["year", "month", "day"],
	},
	format_options={"compression": "snappy"},
	transformation_ctx="AmazonS3_node1702010667689",
	)

	job.commit()