Skip to content

Instantly share code, notes, and snippets.

@cartermp
Created August 3, 2023 19:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cartermp/5f94f2ea80aa1ee13b66c87a5ba79efa to your computer and use it in GitHub Desktop.
Save cartermp/5f94f2ea80aa1ee13b66c87a5ba79efa to your computer and use it in GitHub Desktop.
func GetCosineSimilarity(a, b []float32, idx int) (float32, error) {
if len(a) != len(b) {
return 0, errors.New(fmt.Sprintf("vectors must have the same length: %d != %d, idx: %d", len(a), len(b), idx))
}
var dotProduct float32
for i := 0; i < len(a); i++ {
dotProduct += a[i] * b[i]
}
// openai vectors are already normed
return dotProduct, nil
}
func getTopKMostSimilarColumns(nlqEmbedding []float32, columnEmbeddings []ColumnAndEmbedding, k int) []ColumnSimilarity {
similarities := make([]ColumnSimilarity, len(columnEmbeddings))
for i, ce := range columnEmbeddings {
similarity, err := GetCosineSimilarity(nlqEmbedding, ce.Embedding, i)
if err != nil {
log.Printf("Error calculating similarity: %v", err)
continue
}
similarities[i] = ColumnSimilarity{Column: ce.Column, Similarity: similarity}
}
sort.Slice(similarities, func(i, j int) bool {
return similarities[i].Similarity > similarities[j].Similarity
})
return similarities[:k]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment