Skip to content

Instantly share code, notes, and snippets.

@trapped
Last active August 29, 2015 14:04
Show Gist options
  • Save trapped/d1e62dd3b05e00bfd904 to your computer and use it in GitHub Desktop.
Save trapped/d1e62dd3b05e00bfd904 to your computer and use it in GitHub Desktop.
Go implementation of the Jaccard Index/Distance

The Jaccard Index' formula looks like the following:

formula

and the Jaccard Distance's formula:

formula

The values obtained are between 0 and 1 (both included):

formula

therefore I used a float32 (which was big enough for my needs, though you might want to use a float64).

import "strings"

//whether to recurse when calculating the Jaccard Distance
var JACCARD_RECURSIVE bool = false

func jaccard(a string, b string, casesens bool, distance bool) float32 {
	intersection := ""
	union := ""

	//calculate intersection size
	for _, c := range a {
		if casesens {
			c = rune(strings.ToUpper(string(c))[0])
		}
		if strings.ContainsRune(b, c) && !strings.ContainsRune(intersection, c) {
			intersection += string(c)
		}
	}

	//calculate union size
	for _, c := range a + b {
		if casesens {
			c = rune(strings.ToUpper(string(c))[0])
		}
		if !strings.ContainsRune(union, c) {
			union += string(c)
		}
	}

	if !distance {
		return float32(len(intersection)) / float32(len(union))
	} else {
		if JACCARD_RECURSIVE {
			return 1 - jaccard(a, b, casesens, !distance)
		} else {
			return (float32(len(union)) - float32(len(intersection))) / float32(len(union))
		}
	}

	return -1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment