def applyModelToAllCombinations(trainedModel: LogisticRegressionModel, allComparableDataset: Dataset[(Person, Person, Vector)]): Dataset[PredictedVector] ={ | |
import spark.implicits._ | |
val getFirst = udf((v: Vector) => v(1)) | |
val predictionsRaw: DataFrame = trainedModel.transform(allComparableDataset) | |
predictionsRaw.select( | |
$"left.old_id".as("id_left"), | |
$"right.old_id".as("id_right"), | |
$"features", | |
getFirst($"probability").as("probability"), | |
$"prediction".as("label") | |
) | |
.filter('label === 1.0) | |
.as[PredictedVector] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment