Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
sample spark2 application using scala
mkdir Spark2StarterApp
cd Spark2StarterApp/
mkdir -p src/main/scala
cd src/main/scala
vim Spark2Example.scala
import org.apache.spark.sql.SparkSession
object Spark2Example {
def main(args: Array[String]) {
val sparkSession = SparkSession.builder.
master("local")
.appName("sample spark2 application")
.getOrCreate()
val df = sparkSession.read.option("header","true").csv("/tmp/sample_07.csv")
df.printSchema()
df.select("name").show()
}
}
cd -
vim build.sbt
name := "Spark2StarterApp"
version := "1.0"
scalaVersion := "2.11.7"
val sparkVersion = "2.0.2"
resolvers ++= Seq(
"apache-snapshots" at "http://repository.apache.org/snapshots/"
)
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % sparkVersion,
"org.apache.spark" %% "spark-sql" % sparkVersion,
"org.apache.spark" %% "spark-mllib" % sparkVersion,
"org.apache.spark" %% "spark-streaming" % sparkVersion,
"org.apache.spark" %% "spark-hive" % sparkVersion
)
sbt package
hadoop fs -put sample_07.csv /tmp
export SPARK_MAJOR_VERSION=2
/usr/hdp/current/spark2-client/bin/spark-submit --class Spark2Example target/scala-2.11/spark2starterapp_2.11-1.0.jar
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment