Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType
spark = SparkSession \
.builder \
.appName("StructuredStreamKafka") \
# Subscribe to topic named "topic1", Replace the xxx in .option() with your Kafka public DNS address
df_stream = spark \
.readStream \
.format("kafka") \
.option("kafka.bootstrap.servers", "") \
.option("subscribe", "topic1") \
# Start running the query that prints the running counts to the console
query = df_stream \
.writeStream \
.format("console") \
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.