Skip to content

Instantly share code, notes, and snippets.

Durga Gadiraju dgadiraju

Block or report user

Report or block dgadiraju

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
View spark-udf-toDateUDF.scala
def toDate(d: String) = {
val a = d.split("/")
(a(2) + "%02d".format(a(0).toInt) + "%02d".format(a(1).toInt)).toInt
}
toDate("7/3/2019")
toDate("10/3/2019")
val toDateUDF = org.apache.spark.sql.functions.udf(toDate _)
View spark-core-spark-submit-local.sh
spark-submit \
--master local \
--class retail.DailyProductRevenue \
--packages com.typesafe:config:1.3.2 \
target/scala-2.11/retail_2.11-0.1.jar dev
View spark-core-daily-product-revenue.scala
package retail
import com.typesafe.config.ConfigFactory
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by itversity on 05/06/17.
*/
object DailyProductRevenue {
def main(args: Array[String]): Unit = {
View DailyProductRevenue01.scala
package retail
import com.typesafe.config.ConfigFactory
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by itversity on 05/06/17.
*/
object DailyProductRevenue {
def main(args: Array[String]): Unit = {
View spark-core-create-spark-context.scala
import com.typesafe.config.ConfigFactory
import org.apache.spark.{SparkConf, SparkContext}
val props = ConfigFactory.load()
val envProps = props.getConfig("dev")
val conf = new SparkConf().
setAppName("sbt console").
setMaster(envProps.getString("execution.mode"))
View spark-core-application.properties
dev.execution.mode = local
dev.input.base.dir = /Users/itversity/Research/data/retail_db
dev.output.base.dir = /Users/itversity/Research/data
prod.execution.mode = yarn-client
prod.input.base.dir = /public/retail_db
prod.output.base.dir = /user/training/retail
View spark-core-cartesian.scala
val rdd1 = sc.parallelize(List(1, 2, 3, 4, 5))
val rdd2 = sc.parallelize(List(2, 3, 4, 5, 6))
rdd1.cartesian(rdd2).take(10).foreach(println)
rdd1.cartesian(rdd2).count
View spark-core-cogroup.scala
val orders = sc.textFile("/public/retail_db/orders")
val ordersMap = orders.
map(o => (o.split(",")(0).toInt, o))
val orderItems = sc.textFile("/public/retail_db/order_items")
val orderItemsMap = orderItems.
map(oi => (oi.split(",")(1).toInt, oi))
ordersMap.
cogroup(orderItemsMap).
View spark-core-subtract-get-customer-details.scala
val orderCustomers1 = sc.textFile("/public/retail_db/orders").
filter(o => o.split(",")(1).contains("2013-08")).
map(o => o.split(",")(2).toInt)
val orderCustomers2 = sc.textFile("/public/retail_db/orders").
filter(o => o.split(",")(1).contains("2013-09")).
map(o => o.split(",")(2).toInt)
val customers = sc.textFile("/public/retail_db/customers").
map(c => (c.split(",")(0).toInt, c))
View spark-core-intersection-01-customer-ids.scala
val orderCustomers1 = sc.textFile("/public/retail_db/orders").
filter(o => o.split(",")(1).contains("2013-08")).
map(o => o.split(",")(2).toInt)
val orderCustomers2 = sc.textFile("/public/retail_db/orders").
filter(o => o.split(",")(1).contains("2013-09")).
map(o => o.split(",")(2).toInt)
orderCustomers1.count
orderCustomers2.count
You can’t perform that action at this time.