Skip to content

Instantly share code, notes, and snippets.

Created August 8, 2017 14:16
Show Gist options
  • Save anonymous/22a856fe1ab10cae1fa91cbdd023e347 to your computer and use it in GitHub Desktop.
Save anonymous/22a856fe1ab10cae1fa91cbdd023e347 to your computer and use it in GitHub Desktop.
val countVectorizer: Param[CountVectorizer] =
new Param(this, "countVectorizer", "Converts words into numerical ids.")
setDefault(countVectorizer, new CountVectorizer)
val idf: Param[IDF] =
new Param(this, "idf", "Calculate weights for vector representation of tokens.")
setDefault(idf, new IDF())
private def vectorizePipeline(): Array[PipelineStage] = {
val mcTf = new MultiColumnPipeline()
.setInputCols(questions("tokens"))
.setOutputCols(questions("tf"))
.setStage($(countVectorizer))
val mcIdf = new MultiColumnPipeline()
.setInputCols(questions("tf"))
.setOutputCols(questions("tfidf")
.setStage($(idf))
Array(mcTf, mcIdf)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment