Skip to content

Instantly share code, notes, and snippets.

d = {"JAN": 10, "FEB": 15, "MAR": 12, "APR": 16}
import pandas as pd
s = pd.Series(d)
s.count()
s.sum()
s.min()
s.max()
val ordersDF = spark.
read.
schema("order_id INT, order_date STRING, order_customer_id INT, order_status STRING").
csv("/Users/itversity/Research/data/retail_db/orders")
val ordersDF = spark.
read.
schema("order_id INT, order_date STRING, order_customer_id INT, order_status STRING").
format("csv").
load("/Users/itversity/Research/data/retail_db/orders")
def toDate(d: String) = {
val a = d.split("/")
(a(2) + "%02d".format(a(0).toInt) + "%02d".format(a(1).toInt)).toInt
}
toDate("7/3/2019")
toDate("10/3/2019")
val toDateUDF = org.apache.spark.sql.functions.udf(toDate _)
spark-submit \
--master local \
--class retail.DailyProductRevenue \
--packages com.typesafe:config:1.3.2 \
target/scala-2.11/retail_2.11-0.1.jar dev
package retail
import com.typesafe.config.ConfigFactory
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by itversity on 05/06/17.
*/
object DailyProductRevenue {
def main(args: Array[String]): Unit = {
package retail
import com.typesafe.config.ConfigFactory
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by itversity on 05/06/17.
*/
object DailyProductRevenue {
def main(args: Array[String]): Unit = {
import com.typesafe.config.ConfigFactory
import org.apache.spark.{SparkConf, SparkContext}
val props = ConfigFactory.load()
val envProps = props.getConfig("dev")
val conf = new SparkConf().
setAppName("sbt console").
setMaster(envProps.getString("execution.mode"))
dev.execution.mode = local
dev.input.base.dir = /Users/itversity/Research/data/retail_db
dev.output.base.dir = /Users/itversity/Research/data
prod.execution.mode = yarn-client
prod.input.base.dir = /public/retail_db
prod.output.base.dir = /user/training/retail
val rdd1 = sc.parallelize(List(1, 2, 3, 4, 5))
val rdd2 = sc.parallelize(List(2, 3, 4, 5, 6))
rdd1.cartesian(rdd2).take(10).foreach(println)
rdd1.cartesian(rdd2).count
val orders = sc.textFile("/public/retail_db/orders")
val ordersMap = orders.
map(o => (o.split(",")(0).toInt, o))
val orderItems = sc.textFile("/public/retail_db/order_items")
val orderItemsMap = orderItems.
map(oi => (oi.split(",")(1).toInt, oi))
ordersMap.
cogroup(orderItemsMap).