Skip to content

Instantly share code, notes, and snippets.

@sachaarbonel
Created November 5, 2022 21:34
Show Gist options
  • Save sachaarbonel/7a261eb52e6e11e0a4c93679b90e99fb to your computer and use it in GitHub Desktop.
Save sachaarbonel/7a261eb52e6e11e0a4c93679b90e99fb to your computer and use it in GitHub Desktop.
fn main() -> anyhow::Result<()> {
let model = SentenceEmbeddingsBuilder::remote(SentenceEmbeddingsModelType::AllMiniLmL12V2).create_model()?;
let json = fs::read_to_string("data/books.json")?;
let library: Library = serde_json::from_str(&json)?;
let mut embeddedbooks = Vec::new();
for book in library.books.clone() {
println!("Embedding book: {}", book.title);
let embeddings = model.encode(&[book.clone().summary])?;
let embedding = to_array(embeddings[0].as_slice());
embeddedbooks.push(book.to_embedded(embedding));
}
let kdtree = kd_tree::KdSlice::sort_by(&mut embeddedbooks, |item1, item2, k| {
item1.embeddings[k]
.partial_cmp(&item2.embeddings[k]).unwrap()
});
+ let query = "rich";
+ println!("Querying: {}", query);
+ let rich_embeddings = model.encode(&[query])?;
+ let rich_embedding = to_array(rich_embeddings[0].as_slice());
+ let rich_topic = EmbeddedBook::topic(rich_embedding);
+ let nearests = kdtree.nearests(&rich_topic, 10);
+ for nearest in nearests {
+ println!("nearest: {:?}", nearest.item.title);
+ println!("distance: {:?}", nearest.squared_distance);
+}
Ok(())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment