Skip to content

Instantly share code, notes, and snippets.

@tsusanto
Last active September 26, 2017 12:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save tsusanto/5bd904194cc76e513bf1eb880a17f61a to your computer and use it in GitHub Desktop.
Save tsusanto/5bd904194cc76e513bf1eb880a17f61a to your computer and use it in GitHub Desktop.
spark-shell --packages com.databricks:spark-xml_2.10:0.4.1
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
val df = sqlContext.read.format("com.databricks.spark.xml").option("rowTag", "Transaction").load("/user/tsusanto/POSLog-201409300635-21.xml")
val flattened = df.withColumn("LineItem", explode($"RetailTransaction.LineItem"))
val selectedData = flattened.filter($"SequenceNumber" === "73" ).select($"RetailStoreID",$"WorkstationID", $"SequenceNumber",$"BusinessDayDate",$"OperatorID._OperatorName" as "OperatorName",$"OperatorID._VALUE" as "OperatorID",$"CurrencyCode",$"RetailTransaction.ReceiptDateTime",$"RetailTransaction.TransactionCount",$"LineItem.SequenceNumber",$"LineItem.Tax.TaxableAmount", $"LineItem.Tax.Amount" as "TaxAmount",$"LineItem.Tax.Percent" as "TaxPercent",$"LineItem.Sale.POSIdentity._POSIDType" as "POSIDType",$"LineItem.Sale.POSIdentity.POSItemID" as "POSItemID" ,$"LineItem.Sale.Description",$"LineItem.Sale.RegularSalesUnitPrice", $"LineItem.Sale.ExtendedAmount", $"LineItem.Sale.DiscountAmount", $"LineItem.Sale.ExtendedDiscountAmount", $"LineItem.Sale.Quantity",$"RetailTransaction.Total")
selectedData.show()
val headeronly = df.withColumn("Header", explode($"RetailTransaction.Total"))
val headers = headeronly.filter($"SequenceNumber" === "73" ).select($"RetailStoreID",$"WorkstationID", $"SequenceNumber",$"BusinessDayDate",$"OperatorID._OperatorName" as "OperatorName",$"OperatorID._VALUE" as "OperatorID",$"CurrencyCode",$"Header._TotalType")
headers.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment