Skip to content

Instantly share code, notes, and snippets.

@mooreniemi
Created July 3, 2022 03:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mooreniemi/85b513211c7b32659fdb872c00917d58 to your computer and use it in GitHub Desktop.
Save mooreniemi/85b513211c7b32659fdb872c00917d58 to your computer and use it in GitHub Desktop.
extern crate blas_src;
use hnsw::{Hnsw, Params, Searcher};
use ndarray_npy::{ViewNpyExt, WriteNpyExt};
use memmap2::{Mmap, MmapMut};
use rand_pcg::Pcg64;
use space::Metric;
use std::{fs::OpenOptions, io::BufWriter, time::Instant};
use ndarray::{Array, Array1, ArrayView1, ArrayView2};
use ndarray_rand::{rand_distr::Uniform, RandomExt};
struct Euclidean;
impl Metric<&[f32]> for Euclidean {
type Unit = u32;
fn distance(&self, a: &&[f32], b: &&[f32]) -> u32 {
a.iter()
.zip(b.iter())
.map(|(&a, &b)| (a - b).powi(2))
.sum::<f32>()
.sqrt()
.to_bits()
}
}
struct Embedding;
impl Metric<&[f32]> for Embedding {
type Unit = u32;
fn distance(&self, a: &&[f32], b: &&[f32]) -> u32 {
0.0 as u32
}
}
struct Embeddings {
mmap: Mmap,
}
impl Embeddings {
fn view(&self) -> ndarray::ArrayBase<ndarray::ViewRepr<&f32>, ndarray::Dim<[usize; 2]>> {
ArrayView2::<f32>::view_npy(&self.mmap).expect("viewed npy")
}
}
fn main() -> Result<(), &'static str> {
let d = 256;
let n = 10_000;
let x = Array1::random(d, Uniform::<f32>::new(0., 1.));
let file = OpenOptions::new()
.read(true)
.create(true)
.write(true)
.open("/tmp/embeds.npy")
.expect("create embeds.npy");
let mmap = unsafe { Mmap::map(&file).expect("memmap'd embeds.npy") };
let mmap = mmap.make_mut().expect("now mut");
let embeddings = get_embeddings(n, d, mmap);
println!("embeddings.view()");
let start = Instant::now();
let v = embeddings.view();
println!("took: {:?}", start.elapsed());
println!("in npy view row(i)");
for i in 0..n {
let row = v.row(i);
let _res = x.dot(&row);
}
println!("took: {:?}", start.elapsed());
let mut searcher: Searcher<_> = Searcher::default();
let mut index: Hnsw<Euclidean, &[f32], Pcg64, 32, 32> =
Hnsw::new_params(Euclidean, Params::new().ef_construction(200));
for i in 0..n {
let row: ArrayView1<f32> = v.row(i);
let s: &[f32] = row.as_slice().unwrap();
index.insert(s, &mut searcher);
}
Ok(())
}
fn get_embeddings(n: usize, d: usize, mut mmap: MmapMut) -> Embeddings {
println!("gen embeddings");
let start = Instant::now();
let mut ys = Vec::new();
for _i in 0..n {
let y = Array1::random(d, Uniform::<f32>::new(0., 1.));
ys.push(y);
}
println!("took: {:?}", start.elapsed());
println!("reshape embeddings");
let start = Instant::now();
let nd = Array::from_shape_vec((n, d), ys.into_iter().flatten().collect())
.expect("reshape vec of ndarrays into ndarray");
println!("took: {:?}", start.elapsed());
println!("write embeddings");
let buffer = mmap.as_mut();
let start = Instant::now();
nd.write_npy(BufWriter::new(buffer)).expect("wrote");
println!("took: {:?}", start.elapsed());
let mmap = mmap.make_read_only().expect("ro");
Embeddings { mmap }
}
@mooreniemi
Copy link
Author

82 |         index.insert(s, &mut searcher);
   |               ^^^^^^ method cannot be called on `Hnsw<Euclidean, &[f32], Lcg128Xsl64, 32_usize, 32_usize>` due to unsatisfied trait bounds
   |
   = note: the following trait bounds were not satisfied:
           `Euclidean: space::Metric<&[f32]>`
note: the following trait must be implemented
  --> /home/alex/.cargo/registry/src/github.com-1ecc6299db9ec823/space-0.17.0/src/lib.rs:50:1
   |
50 | / pub trait Metric<P> {
51 | |     type Unit: Unsigned + Ord + Copy;
52 | |
53 | |     fn distance(&self, a: &P, b: &P) -> Self::Unit;
54 | | }
   | |_^

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment