Skip to content

Instantly share code, notes, and snippets.

View josep2's full-sized avatar
🎯
Focusing

Jowanza Joseph josep2

🎯
Focusing
View GitHub Profile
import java.sql._
// Start with a DataFrame and lower the number of coalesce to the number of machines I have
dataframe.coalesce(4).mapPartitions ((d) => Iterator (d) ).foreach {
batch => // Per parition, create a JDBC connection
val dbc: Connection = DriverManager.getConnection ("JDBCURL")
val st: PreparedStatement = dbc.prepareStatement ("YOUR PREPARED STATEMENT")
// Within the partition figure out what batch I'd like to write to MySQL
batch.grouped ("# Of Rows you want per batch").foreach {
session => // Add each of those
select * from fruit_basket group by type;
case class Sale(id: String, amount: Double)
val sales: DataStream[Sale] = env.addSource( SOME_STREAMING_SOURCE) // Read in a stream that is typed as a Sale
sales.window(Count.of(50)).every(Time.of(1, TimeUnit.SECONDS)).sum("amount") // Every second sum the sales
case class Sale(id: String, amount: Double)
val sales: DataStream[Sale] = env.addSource( SOME_STREAMING_SOURCE) // Read in a stream that is typed as a Sale
sales.window(Count.of(50)).every(Time.of(1, TimeUnit.SECONDS)).sum("amount") // Every second sum the sales
case class Sale(id: String, amount: Double)
val sales: DataStream[Sale] = env.addSource( SOME_STREAMING_SOURCE) // Read in a stream that is typed as a Sale
sales.writeAsCsv(SOME_PATH)
case class Sale(id: String, amount: Double)
val sales: DataStream[Sale] = env.addSource( SOME_STREAMING_SOURCE) // Read in a stream that is typed as a Sale
sales.map{ r => r.amount+1} // Add one
// Start your Livy server: https://github.com/cloudera/livy#prerequisites
// First step is I instantiate a Spark Session and create a function that square in the spark shell and leave it open
val sparkSession = SparkSession.builder
.master("local[1]")
.appName("app_1")
.getOrCreate()
// Start your Livy server: https://github.com/cloudera/livy#prerequisites
// First step is I instantiate a Spark Session and create a function that square in the spark shell and leave it open
val sparkSession = SparkSession.builder
.master("local[1]")
.appName("app_1")
.getOrCreate()
@josep2
josep2 / spark_submit.txt
Created April 17, 2017 16:31
Borrowed from the Spark Docs for illustration: http://spark.apache.org/docs/latest/running-on-mesos.html
./bin/spark-submit \
--class org.apache.spark.examples.SparkPi \
--master mesos://207.184.161.138:7077 \
--deploy-mode cluster \
--supervise \
--executor-memory 20G \
--total-executor-cores 100 \
http://path/to/examples.jar \
1000
val g: GraphFrame = ...
// Look for from connections with cash greater than 20000
// To with ages less than 50 who are not friends
val f = g.bfs.fromExpr("cash > 20000").toExpr("age < 50")
.edgeFilter("relationship = 'follow'")
.maxPathLength(3)
.run()
f.show()