Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

View anand086's full-sized avatar

Anand Prakash anand086

View GitHub Profile
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from awsglueml.transforms import EntityDetector
from pyspark.sql.types import StringType
from awsglue.dynamicframe import DynamicFrame
from pyspark.sql.functions import *
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from awsglueml.transforms import EntityDetector
from awsglue.dynamicframe import DynamicFrame
from pyspark.sql.functions import *
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from awsglueml.transforms import EntityDetector
from pyspark.sql.types import MapType, StringType, StructType, StructField
from awsglue.dynamicframe import DynamicFrame
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from awsglueml.transforms import EntityDetector
args = getResolvedOptions(sys.argv, ["JOB_NAME"])
sc = SparkContext()
[Worker-064f2e13cca81bb69] [2022-09-04 08:12:54,563] INFO [lensesio-msk-transaction-events-src-s3-1|task-0] WorkerSinkTask{id=lensesio-msk-transaction-events-src-s3-1-0} Committing offsets asynchronously using sequence number 11: {transaction_events-0=OffsetAndMetadata{offset=8, leaderEpoch=null, metadata=''}, transaction_events-1=OffsetAndMetadata{offset=3, leaderEpoch=null, metadata=''}, transaction_events-2=OffsetAndMetadata{offset=9, leaderEpoch=null, metadata=''}, transaction_events-3=OffsetAndMetadata{offset=107, leaderEpoch=null, metadata=''}, transaction_events-4=OffsetAndMetadata{offset=8, leaderEpoch=null, metadata=''}} (org.apache.kafka.connect.runtime.WorkerSinkTask:352)
[Worker-064f2e13cca81bb69] [2022-09-04 08:12:54,733] INFO [lensesio-msk-transaction-events-src-s3-1|task-1] WorkerSinkTask{id=lensesio-msk-transaction-events-src-s3-1-1} Committing offsets asynchronously using sequence number 11: {transaction_events-8=OffsetAndMetadata{offset=105, leaderEpoch=null, metadata=''}, transaction_events
connector.class=io.lenses.streamreactor.connect.aws.s3.sink.S3SinkConnector
tasks.max=2
topics=transaction_events
connect.s3.vhost.bucket=true
schema.enable=false
key.converter.schemas.enable=false
connect.s3.kcql=INSERT INTO msk-lensesio-plugin:MSKCluster SELECT * FROM transaction_events STOREAS `JSON` WITH_FLUSH_INTERVAL = 300
aws.region=eu-central-1
aws.custom.endpoint=https://s3.eu-central-1.amazonaws.com/
value.converter.schemas.enable=false
\d+ traffic_violations_p
Partitioned table "admin.traffic_violations_p"
+-------------------------+------------------------+-----------+----------+---------+----------+--------------+-------------+
| Column | Type | Collation | Nullable | Default | Storage | Stats target | Description |
+-------------------------+------------------------+-----------+----------+---------+----------+--------------+-------------+
| seqid | text | | | | extended | | |
| date_of_stop | date | | not null | | plain | | |
| time_of_stop | time without time zone | | | | plain | | |
| agency | text | | | | extended | | |
| subagency | tex
SELECT additionaldata.cardholdername,
card_detail.cardtype,
transaction.currency[1] as currency,
transaction.amount,
from_unixtime(transaction.created_at/1000.0) as timestamp
FROM msk_transaction_events WHERE success=true and transaction.amount > 7000 order by 5 desc
[ec2-user@ip-10-0-1-43 ~]$ kafka-run-class.sh kafka.tools.GetOffsetShell --broker-list $BOOTSTRAP --topic transaction_events | awk -F ":" '{print $3}'
22369
22060
22547
22231
22340