Skip to content

Instantly share code, notes, and snippets.

@ficolo
Last active December 6, 2017 00:54
Show Gist options
  • Save ficolo/455031f6b8226aee932317464ac4a240 to your computer and use it in GitHub Desktop.
Save ficolo/455031f6b8226aee932317464ac4a240 to your computer and use it in GitHub Desktop.
import org.apache.spark.sql.functions._
import com.databricks.spark.corenlp.functions._
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
object HelloWorld {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Simple Application").setMaster("local[*]")
val sc = new SparkContext(conf)
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext.implicits._
val input = Seq(
(1, "Stanford University is located in California. It is a great university.")
).toDF("id", "text")
val output = input
.select(cleanxml('text).as('doc))
.select(explode(ssplit('doc)).as('sen))
.select('sen, tokenize('sen).as('words), ner('sen).as('nerTags), sentiment('sen).as('sentiment))
output.show(truncate = false)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment