Skip to content

Instantly share code, notes, and snippets.

@Habitats
Created March 21, 2018 22:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Habitats/8aa7f6d7a2a8b98f4b057e3a2b001754 to your computer and use it in GitHub Desktop.
Save Habitats/8aa7f6d7a2a8b98f4b057e3a2b001754 to your computer and use it in GitHub Desktop.
case class Field(field: String, value: Any)
case class Fields(fields: List[Field], score: Double, siteId: String, id: String)
def hello: Unit = {
val rdd = sc.textFile("hello.json").map { json =>
import org.json4s.jackson.JsonMethods._
implicit val jsonFormats = DefaultFormats
val fields = parse(json).extract[Fields]
for {
field <- fields.fields if field.field == "body"
body <- field.value match {
case a: List[String] => a
case v: String => List(v)
}
word <- body.split(" ")
} yield {
word
}
}
val collected = rdd.collect()
println("Words:" + collected)
collected
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment