Skip to content

Instantly share code, notes, and snippets.

@ArtemKoval
Last active June 20, 2018 05:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ArtemKoval/80169887a487bb5ab89f736eec81e92b to your computer and use it in GitHub Desktop.
Save ArtemKoval/80169887a487bb5ab89f736eec81e92b to your computer and use it in GitHub Desktop.
Elasticsearch + Spark
import org.apache.spark.{SparkConf, SparkContext}
import org.elasticsearch.spark._
object EsSpark {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("es-spark").setMaster("local[*]")
conf.set("es.index.auto.create", "true")
conf.set("es.nodes", "amazonaws.com")
conf.set("es.port", "443")
conf.set("es.nodes.wan.only", "true")
val sc = new SparkContext(conf)
val numbers = Map("one" -> 1, "two" -> 2, "three" -> 3)
val airports = Map("arrival" -> "Otopeni", "SFO" -> "San Fran")
sc.makeRDD(Seq(numbers, airports)).saveToEs("spark/docs")
val rdd = sc.esRDD("spark/docs")
println(rdd.count())
}
}
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-hadoop</artifactId>
<version>5.1.2</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>log4j-over-slf4j</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment