Created
February 14, 2020 18:39
-
-
Save CliffordAnderson/d4a6a4dc310a5ebae2d24174335858d9 to your computer and use it in GitHub Desktop.
Spark Notes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val pbp = spark.read.format("csv").load("Desktop/pbp.csv") | |
pbp.show | |
pbp.printSchema | |
val bp = pbp.withColumnRenamed("_c0", "article").withColumnRenamed("_c1", "journal").withColumnRenamed("_c2", "volume").withColumnRenamed("_c3", "issue").withColumnRenamed("_c4", "date").withColumnRenamed("_c5", "pages").withColumnRenamed("_c6", "url").withColumnRenamed("_c7", "text") | |
bp.printSchema | |
import com.johnsnowlabs.nlp.pretrained.PretrainedPipeline | |
val explainDocumentPipeline = PretrainedPipeline("explain_document_ml") | |
val bp_annotated = explainDocumentPipeline.transform(bp) | |
bp_annotated.select("token").show() | |
val bp_select = bp.select("text") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment