bdnf/wc-stream.py

## wc-stream.py
from pyspark.sql.functions import *


spark = (SparkSession
 .builder
 .appName("PythonMnMCount")
 .getOrCreate())
lines = (spark
  .readStream.format("socket")
  .option("host", "localhost")
  .option("port", 9999)
  .load())

words = lines.select(split(col("value"), "\\s").alias("word"))
counts = words.groupBy("word").count()
checkpointDir = "..."
streamingQuery = (counts
  .writeStream
  .format("console")
  .outputMode("complete")
  .trigger(processingTime="1 second")
  .option("checkpointLocation", checkpointDir)
  .start())
streamingQuery.awaitTermination()
	from pyspark.sql.functions import *


	spark = (SparkSession
	.builder
	.appName("PythonMnMCount")
	.getOrCreate())
	lines = (spark
	.readStream.format("socket")
	.option("host", "localhost")
	.option("port", 9999)
	.load())

	words = lines.select(split(col("value"), "\\s").alias("word"))
	counts = words.groupBy("word").count()
	checkpointDir = "..."
	streamingQuery = (counts
	.writeStream
	.format("console")
	.outputMode("complete")
	.trigger(processingTime="1 second")
	.option("checkpointLocation", checkpointDir)
	.start())
	streamingQuery.awaitTermination()