Skip to content

Instantly share code, notes, and snippets.

@jeechu
Last active September 4, 2019 12:38
Show Gist options
  • Save jeechu/f1956d87cfd364c4afe14009944e356a to your computer and use it in GitHub Desktop.
Save jeechu/f1956d87cfd364c4afe14009944e356a to your computer and use it in GitHub Desktop.
Feature vectorization using hashing trick in golang
func hashify(appList []string, vectorLength int64) ([]int){
hashedList := make([]int, vectorLength)
for _, app := range appList {
hashedValue := sha256.New()
hashedValue.Write([]byte(app))
hexStr := fmt.Sprintf("%x", hashedValue.Sum(nil))
hexInt := new(big.Int)
hexInt, ok := hexInt.SetString(hexStr, 16)
if !ok {
fmt.Println("error")
panic("couldn't create bigint")
}
bigVectorLength := big.NewInt(vectorLength)
modulo := new(big.Int)
modulo = modulo.Mod(hexInt, bigVectorLength)
moduloInt64 := modulo.Int64()
hashedList[moduloInt64] += 1
}
return hashedList
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment