A binary format to store the vectors.
| dim (4 bytes) | vector (4 * dim bytes) |
| dim (4 bytes) | vector (4 * dim bytes) |
...
| dim (4 bytes) | vector (4 * dim bytes) |
from struct import unpack, pack | |
import numpy as np | |
def read_vec(filepath: str, vec_type: np.dtype = np.float32): | |
"""Read vectors from a file. Support `fvecs`, `ivecs` and `bvecs` format. | |
Args: | |
filepath: The path of the file. | |
vec_type: The type of the vectors. | |
""" | |
size = np.dtype(vec_type).itemsize | |
with open(filepath, "rb") as f: | |
vecs = [] | |
while True: | |
try: | |
buf = f.read(4) | |
if len(buf) == 0: | |
break | |
dim = unpack("<i", buf)[0] | |
vecs.append(np.frombuffer(f.read(dim * size), dtype=vec_type)) | |
except Exception as err: | |
print(err) | |
break | |
return np.array(vecs) | |
def write_vec(filepath: str, vecs: np.ndarray, vec_type: np.dtype = np.float32): | |
"""Write vectors to a file. Support `fvecs`, `ivecs` and `bvecs` format.""" | |
with open(filepath, "wb") as f: | |
for vec in vecs: | |
f.write(pack("<i", len(vec))) | |
f.write(vec.tobytes()) |
use std::fs::File; | |
use std::io::{BufReader, BufWriter, Read, Write}; | |
use std::path::Path; | |
use num_traits::{FromBytes, ToBytes}; | |
/// Read the fvces/ivces file. | |
pub fn read_vecs<T>(path: &Path) -> std::io::Result<Vec<Vec<T>>> | |
where | |
T: Sized + FromBytes<Bytes = [u8; 4]>, | |
{ | |
let file = File::open(path)?; | |
let mut reader = BufReader::new(file); | |
let mut buf = [0u8; 4]; | |
let mut count: usize; | |
let mut vecs = Vec::new(); | |
loop { | |
count = reader.read(&mut buf)?; | |
if count == 0 { | |
break; | |
} | |
let dim = u32::from_le_bytes(buf) as usize; | |
let mut vec = Vec::with_capacity(dim); | |
for _ in 0..dim { | |
reader.read_exact(&mut buf)?; | |
vec.push(T::from_le_bytes(&buf)); | |
} | |
vecs.push(vec); | |
} | |
Ok(vecs) | |
} | |
/// Write the fvecs/ivecs file. | |
pub fn write_vecs<T>(path: &Path, vecs: &[impl AsRef<[T]>]) -> std::io::Result<()> | |
where | |
T: Sized + ToBytes, | |
{ | |
let file = File::create(path)?; | |
let mut writer = BufWriter::new(file); | |
for vec in vecs.iter() { | |
writer.write_all(&(vec.as_ref().len() as u32).to_le_bytes())?; | |
for v in vec.as_ref().iter() { | |
writer.write_all(T::to_le_bytes(v).as_ref())?; | |
} | |
} | |
writer.flush()?; | |
Ok(()) | |
} |