Download tool from here. Unzip, then launch: bin/zeppelin-daemon.sh start
Then, browse to http://localhost:9009/#/
Here is an example Systemd service file:
$ cat /etc/systemd/system/zeppelin.service
[Unit]
Description = Zeppelin
After = network.target
[Service]
WorkingDirectory=/opt/zeppelin-0.7.3-bin-all
ExecStart=/opt/zeppelin-0.7.3-bin-all/bin/zeppelin.sh
[Install]
WantedBy = multi-user.target
Usage: service zepplin start|status|status|restart
Environment variable $SPARK_HOME
set in conf/zeppelin.env.sh
Check spark version using println(sc.version)
. It must match the version of MongoDB Spark Connector.
- Explicit Data Frame Example
%dep
// Zeppelin uses Maven format (.m2) repositories, make sure to do use 'sbt compile publishLocal' for Rocket Engine
z.reset()
z.load("org.mongodb.spark:mongo-spark-connector_2.11:2.2.0")
z.load("org.vegas-viz:vegas_2.11:0.3.11")
z.load("com.pnlsys:rocketengine_2.11:0.1")
%spark
import com.mongodb.spark._
import com.mongodb.spark.rdd.MongoRDD
import org.apache.spark.rdd.RDD
import org.bson.Document
import spark.implicits._
case class Result(mcycle: Int, minstet: Int) // <-- DataFrame is defined here
val inputRdd: MongoRDD[Document] = MongoSpark.load(sc)
val rdd: RDD[Result] = inputRdd.filter(f => {
f.containsKey("mcycle") && f.containsKey("minstret")
}).map(m => {
val x = m.getString("mcycle")
val y = m.getString("minstret")
Result(x.toInt, y.toInt)
})
//assert(rdd.count != 0)
//rdd.take(10).foreach(println)
val df = rdd.toDF()
z.show(df)
- Implicit Data Frame Example
%spark
import com.mongodb.spark._
import com.mongodb.spark.rdd.MongoRDD
import org.apache.spark.rdd.RDD
import org.bson.Document
import spark.implicits._
println("Hello, World!")
println(sc.version)
val rdd = MongoSpark.load(sc)
val df = rdd.toDF()
df.printSchema()
println("Goodbye, World!")
see modulefile
$ZEPPELIN_HOME/conf must be writeable whomever runs zeppelin