Skip to content

Instantly share code, notes, and snippets.

Created August 6, 2017 13:15
Show Gist options
  • Save anonymous/43bc35926fc4bb2faa8a8431f72edb08 to your computer and use it in GitHub Desktop.
Save anonymous/43bc35926fc4bb2faa8a8431f72edb08 to your computer and use it in GitHub Desktop.
val logisticRegression: Param[LogisticRegression] =
new Param(this, "logisticRegression", "Combine question vectors pairs into a predicted probability.")
setDefault(logisticRegression, new LogisticRegression())
private def logisticRegressionPipeline(): Array[PipelineStage] = {
val labelCol = "isDuplicateLabel"
val assembler = new VectorAssembler().setInputCols(questions("lda")).setOutputCol("mergedlda")
val labeler = new SQLTransformer().setStatement(
s"SELECT *, cast(isDuplicate as int) $labelCol from __THIS__")
val lr = $(logisticRegression)
.setFeaturesCol("mergedlda").setProbabilityCol("p").setRawPredictionCol("raw")
.setLabelCol(labelCol)
Array(assembler, labeler, lr)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment