Skip to content

Instantly share code, notes, and snippets.

@elena-kolevska
Last active September 12, 2023 10:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save elena-kolevska/35dbb9bf049c2ddd2da3d1a513fa6a3a to your computer and use it in GitHub Desktop.
Save elena-kolevska/35dbb9bf049c2ddd2da3d1a513fa6a3a to your computer and use it in GitHub Desktop.
Get embeddings from Hugging face, save them in Redis and run a vector similarity search
// Run this gist with Redis Stack 6.2
// Tested with:
// docker run -p 6381:6379 redis/redis-stack-server:6.2.6-v9-arm64
package main
import (
"bytes"
"context"
"encoding/binary"
"encoding/json"
"fmt"
"github.com/redis/go-redis/v9"
"io"
"math"
"net/http"
"strconv"
"strings"
"unsafe"
)
type InferenceRequest struct {
Inputs []string `json:"inputs"`
}
type Bike struct {
Name string `json:"name"`
Description string `json:"description"`
Embeddings []float64 `json:"description_embeddings"`
Score float64
}
func main() {
// Get a new Redis connection
ctx := context.Background()
rdb := redis.NewClient(&redis.Options{
Addr: ":6381",
})
rdb.FlushDB(ctx)
// Load bikes
err := LoadBikes(ctx, rdb)
if err != nil {
fmt.Printf(err.Error())
}
// Create an index
err = CreateIndex(ctx, rdb)
if err != nil {
fmt.Printf("couldn't create index: %s", err.Error())
}
// Search for a bike
searchTerm := "Best bike for 5 a year old"
bikes, err := SearchForBikes(ctx, rdb, searchTerm)
if err != nil {
fmt.Printf("couldn't search for bikes: %s", err.Error())
}
for _, bike := range bikes {
fmt.Printf("bike: %+v\n\n", bike)
}
}
func SearchForBikes(ctx context.Context, rdb *redis.Client, term string) ([]*Bike, error) {
searchTermEmbeddings, err := GetEmbeddingsAsFloats([]string{term})
if err != nil {
return nil, fmt.Errorf("couldn't get embeddings: %s", err.Error())
}
bikesRaw, err := rdb.Do(ctx, "FT.SEARCH", "idx:bikes",
"*=>[KNN 3 @description_embeddings $my_blob AS score]",
"RETURN", "3", "score", "name", "description",
"PARAMS", "2", "my_blob", VectorString64(searchTermEmbeddings),
"SORTBY", "score",
"DIALECT", "2").Result()
if err != nil {
return nil, fmt.Errorf("couldn't search for bikes: %s", err.Error())
}
//fmt.Printf("bikes: %#v", bikes)
bikes := bikesRaw.([]interface{})
var result []*Bike
var scoreStr string
var score float64
for i := 2; i < len(bikes); i = i + 2 {
bikeMap := bikes[i].([]interface{})
scoreStr = bikeMap[1].(string)
score, err = strconv.ParseFloat(scoreStr, 64)
b := &Bike{
Score: score,
Name: bikeMap[3].(string),
Description: bikeMap[5].(string),
}
result = append(result, b)
}
return result, nil
}
func CreateIndex(ctx context.Context, rdb *redis.Client) error {
_, err := rdb.Do(ctx, "FT.CREATE", "idx:bikes",
"ON", "JSON",
"PREFIX", 1, "bike:",
"SCHEMA",
"$.name", "as", "name", "TEXT", "NOSTEM", "SORTABLE",
"$.description", "as", "description", "TEXT",
"$.description_embeddings", "as", "description_embeddings", "VECTOR", "FLAT", 10,
"TYPE", "FLOAT64",
"DIM", 768,
"DISTANCE_METRIC", "L2",
"INITIAL_CAP", 111,
"BLOCK_SIZE", 111).Result()
if err != nil {
return err
}
return nil
}
func LoadBikes(ctx context.Context, rdb *redis.Client) error {
bikes := []Bike{
{
Name: "Bicyk",
Description: "Kids want to ride with as little weight as possible. Especially on an incline! They may be at the age when a 27.5\\\" wheel bike is just too clumsy coming off a 24\\\" bike. The Hillcraft 26 is just the solution they need! Imagine 120mm travel. Boost front/rear. You have NOTHING to tweak because it is easy to assemble right out of the box. The Hillcraft 26 is an efficient trail trekking machine. Up or down does not matter - dominate the trails going both down and up with this amazing bike. The name Monarch comes from Monarch trail in Colorado where we love to ride. It’s a highly technical, steep and rocky trail but the rip on the waydown is so fulfilling. Don’t ride the trail on a hardtail! It is so much more fun on the full suspension Hillcraft! Hit your local trail with the Hillcraft Monarch 26 to get to where you want to go.",
},
{
Name: "Nord",
Description: "The Chook Air 5 gives kids aged six years and older a durable and uberlight mountain bike for their first experience on tracks and easy cruising through forests and fields. The lower top tube makes it easy to mount and dismount in any situation, giving your kids greater safety on the trails. The Chook Air 5 is the perfect intro to mountain biking.",
},
{
Name: "Eva 291",
Description: "The sister company to Nord, Eva launched in 2005 as the first and only women-dedicated bicycle brand. Designed by women for women, allEva bikes are optimized for the feminine physique using analytics from a body metrics database. If you like 29ers, try the Eva 291. It’s a brand new bike for 2022.. This full-suspension, cross-country ride has been designed for velocity. The 291 has 100mm of front and rear travel, a superlight aluminum frame and fast-rolling 29-inch wheels. Yippee!",
},
}
for i, bike := range bikes {
embedding, err := GetEmbeddingsAsFloats([]string{bike.Description})
if err != nil {
return fmt.Errorf("couldn't get embeddings: %s", err.Error())
}
bike.Embeddings = embedding
bikeJson, err := json.Marshal(&bike)
if err != nil {
return fmt.Errorf("couldn't marshall bike struct to JSON: %s", err.Error())
}
keyname := fmt.Sprintf("bike:%d", i)
_, err = rdb.Do(ctx, "JSON.SET", keyname, "$", bikeJson).Result()
if err != nil {
return fmt.Errorf("couldn't save bike json to Redis: %s", err.Error())
}
}
return nil
}
func GetEmbeddingsAsFloats(sentences []string) ([]float64, error) {
data, err := GetEmbeddingsAsBytes(sentences)
floats, err := ConvertToFloatArray(strings.Trim(strings.Trim(string(data), "[["), "]]"))
if err != nil {
return nil, err
}
return floats, nil
}
func ConvertToFloatArray(str string) ([]float64, error) {
strValues := strings.Split(str, ",")
floatArray := make([]float64, len(strValues))
for i, strValue := range strValues {
value, err := strconv.ParseFloat(strings.TrimSpace(strValue), 64)
if err != nil {
return nil, fmt.Errorf("failed to parse float: %w", err)
}
floatArray[i] = value
}
return floatArray, nil
}
func GetEmbeddingsAsBytes(sentences []string) ([]byte, error) {
model := "sentence-transformers/all-distilroberta-v1"
apiURL := fmt.Sprintf("https://api-inference.huggingface.co/pipeline/feature-extraction/%s", model)
requestPayload := InferenceRequest{
Inputs: sentences,
}
requestBody, err := json.Marshal(requestPayload)
if err != nil {
return nil, err
}
req, err := http.NewRequest(http.MethodPost, apiURL, bytes.NewBuffer(requestBody))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer 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")
client := http.DefaultClient
response, err := client.Do(req)
if err != nil {
return nil, err
}
defer response.Body.Close()
data, err := io.ReadAll(response.Body)
if err != nil {
return nil, err
}
return data, nil
}
// VectorString64 convert the provided []float64 into a string. Users can use this to build vector search queries:
//
// client.B().FtSearch().Index("idx").Query("*=>[KNN 5 @vec $V]").
// Params().Nargs(2).NameValue().NameValue("V", rueidis.VectorString64([]float64{1})).
// Dialect(2).Build()
func VectorString64(v []float64) string {
b := make([]byte, len(v)*8)
for i, e := range v {
i := i * 8
binary.LittleEndian.PutUint64(b[i:i+8], math.Float64bits(e))
}
return BinaryString(b)
}
// BinaryString convert the provided []byte into a string without copy. It does what strings.Builder.String() does.
// Redis Strings are binary safe, this means that it is safe to store any []byte into Redis directly.
// Users can use this BinaryString helper to insert a []byte as the part of redis command. For example:
//
// client.B().Set().Key(rueidis.BinaryString([]byte{0})).Value(rueidis.BinaryString([]byte{0})).Build()
//
// To read back the []byte of the string returned from the Redis, it is recommended to use the RedisMessage.AsReader.
func BinaryString(bs []byte) string {
return unsafe.String(unsafe.SliceData(bs), len(bs))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment