Skip to content

Instantly share code, notes, and snippets.

@TomLous TomLous/ManualLabel.scala
Last active Apr 25, 2017

Embed
What would you like to do?
// Tabulator: checkout http://stackoverflow.com/questions/7539831/scala-draw-table-to-console
def propertyList(kvKRecord: KvKRecord): List[Any] = KvKRecord.unapply(kvKRecord).map(_.productIterator.toList).getOrElse(Nil)
val labeledList: ArrayBuffer[LabeledVector] = ArrayBuffer()
breakable {
comparableDataset
.sample(withReplacement = false, Config.sampleFactor)
.collect()
.foreach {
case (left, right, vector) => {
val table = propertyList(left) zip propertyList(right) map (x => List(x._1, x._2))
println(Tabulator.format(table))
println("Same? [y/n]")
Try(scala.io.StdIn.readChar()) match {
case Success('y') => labeledList.append(LabeledVector(left.dossierNummer, right.dossierNummer, vector, 1.0))
case Success('n') => labeledList.append(LabeledVector(left.dossierNummer, right.dossierNummer, vector, 0.0))
case _ => break
}
}
}
}
spark
.createDataFrame(labeledList.toList)
.write.mode(SaveMode.Append)
.parquet(path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.