The Jaccard Index' formula looks like the following:
and the Jaccard Distance's formula:
The values obtained are between 0 and 1 (both included):
therefore I used a float32
(which was big enough for my needs, though you might want to use a float64
).
import "strings"
//whether to recurse when calculating the Jaccard Distance
var JACCARD_RECURSIVE bool = false
func jaccard(a string, b string, casesens bool, distance bool) float32 {
intersection := ""
union := ""
//calculate intersection size
for _, c := range a {
if casesens {
c = rune(strings.ToUpper(string(c))[0])
}
if strings.ContainsRune(b, c) && !strings.ContainsRune(intersection, c) {
intersection += string(c)
}
}
//calculate union size
for _, c := range a + b {
if casesens {
c = rune(strings.ToUpper(string(c))[0])
}
if !strings.ContainsRune(union, c) {
union += string(c)
}
}
if !distance {
return float32(len(intersection)) / float32(len(union))
} else {
if JACCARD_RECURSIVE {
return 1 - jaccard(a, b, casesens, !distance)
} else {
return (float32(len(union)) - float32(len(intersection))) / float32(len(union))
}
}
return -1
}