Skip to content

Instantly share code, notes, and snippets.

@TomLous
Last active April 25, 2017 09:12
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TomLous/a5082c4b39b78d0522cd2fca60ff1cc2 to your computer and use it in GitHub Desktop.
Save TomLous/a5082c4b39b78d0522cd2fca60ff1cc2 to your computer and use it in GitHub Desktop.
import java.lang.Math._
import info.debatty.java.stringsimilarity.JaroWinkler
case class KvKRecord(dossierNummer: String, vgNummer: String, naamShort: String, naamShortP1: String, naamShortP2: String, naamLong: String, adresV: String, postcodePlaatsV: String, adresC: String, postcodePlaatsC: String, wptf: Int, sbi: Int) {
def vectorValues: List[Any] = List(dossierNummer, vgNummer, naamShort, naamShortP1, naamShortP2, naamLong, adresV, postcodePlaatsV, adresC, postcodePlaatsC, wptf, sbi)
def distance(other: KvKRecord): List[Double] = {
val jw = new JaroWinkler
vectorValues zip other.vectorValues map {
case (a: String, b: String) => jw.distance(a, b)
case (a: Int, b: Int) => log10(abs(a - b) + 1) / 5
case _ => 0.5
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment