Last active
December 26, 2022 13:00
-
-
Save anbento0490/49313e47d4e56c7aad35f2baaefb1a8e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql.functions import * | |
from pyspark.sql import functions as f | |
from pyspark.sql import Window | |
dataframe =(spark.read.option("header","true").csv("/FileStore/sales_5000000.csv")) | |
#dataframe.show() | |
df = dataframe.drop("Country", "Sales Channel", "Order ID", 'Ship Date', 'Units Sold', 'Unit Price', 'Unit Cost', 'Total Cost', 'Total Profit' ,'Order Priority')\ | |
.withColumn('Order Date', to_date(col('Order Date'),'M/dd/yyyy'))\ | |
.withColumn('Total Revenue',col('Total Revenue').cast('double'))\ | |
.withColumn('Total Revenue',col('Total Revenue')/1000000)\ | |
.withColumn('Total Revenue',f.round(col('Total Revenue'), 2))\ | |
.withColumnRenamed('Total Revenue', 'Total Revenue (£M)')\ | |
.orderBy('Order Date') | |
df.show(truncate=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment