Skip to content

Instantly share code, notes, and snippets.

@cipepser
Created March 31, 2018 02:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cipepser/a29690d4cf9fe2c6deac6c8cad633092 to your computer and use it in GitHub Desktop.
Save cipepser/a29690d4cf9fe2c6deac6c8cad633092 to your computer and use it in GitHub Desktop.
package main
import (
"encoding/gob"
"fmt"
"log"
"math"
"os"
"sort"
"gonum.org/v1/gonum/mat"
)
type CosineSimilarity struct {
word string
value float64
}
func main() {
frp, err := os.Open("../data/q85.proj.txt")
if err != nil {
log.Fatal(err)
}
defer frp.Close()
dec := gob.NewDecoder(frp)
var proj mat.Dense
err = dec.Decode(&proj)
if err != nil {
log.Fatal(err)
}
frd, err := os.Open("../data/q85.dictt.txt")
if err != nil {
log.Fatal(err)
}
defer frd.Close()
dec = gob.NewDecoder(frd)
dict := make(map[string]int)
err = dec.Decode(&dict)
if err != nil {
log.Fatal(err)
}
vSpain := mat.NewVecDense(len(proj.RawRowView(dict["Spain"])), proj.RawRowView(dict["Spain"]))
vMadrid := mat.NewVecDense(len(proj.RawRowView(dict["Madrid"])), proj.RawRowView(dict["Madrid"]))
vAthens := mat.NewVecDense(len(proj.RawRowView(dict["Athens"])), proj.RawRowView(dict["Athens"]))
vTarget := new(mat.VecDense)
vTarget.SubVec(vSpain, vMadrid)
vTarget.AddVec(vTarget, vAthens)
css := make([]CosineSimilarity, 0)
for w, i := range dict {
v := proj.RowView(i)
cosine := cos(vTarget, v)
if !math.IsNaN(cosine) {
css = append(css, CosineSimilarity{
word: w,
value: cosine,
})
}
}
sort.Slice(css, func(i, j int) bool {
return css[i].value > css[j].value
})
for i, cs := range css {
if i > 9 {
break
}
fmt.Println(i+1, " ", cs.word, ":", cs.value)
}
}
func cos(a, b mat.Vector) float64 {
return mat.Dot(a, b) / (math.Sqrt(mat.Dot(a, a)) * math.Sqrt(mat.Dot(b, b)))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment