Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
from pyspark.sql import SparkSession
spark = SparkSession \
.builder \
.appName('spark-demo') \
df_bakery = \
.format('csv') \
.option('header', 'true') \
.option('delimiter', ',') \
.option('inferSchema', 'true') \
df_sorted = df_bakery.cube('item').count() \
.filter('item NOT LIKE \'NONE\'') \
.filter('item NOT LIKE \'Adjustment\'') \
.orderBy(['count', 'item'], ascending=[False, True]), False)
df_sorted.coalesce(1) \
.write.format('csv') \
.option('header', 'true') \
.save('output/items-sold.csv', mode='overwrite')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment