Skip to content

Instantly share code, notes, and snippets.

@mdouze
Created August 16, 2021 08:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mdouze/42c10ff901970d94ddb1cc182e392f2a to your computer and use it in GitHub Desktop.
Save mdouze/42c10ff901970d94ddb1cc182e392f2a to your computer and use it in GitHub Desktop.
import numpy as np
import faiss

from faiss.contrib import datasets
# make a 1000-vector dataset in 32D
ds = datasets.SyntheticDataset(32, 0, 1000, 0)
index = faiss.index_factory(ds.d, "HNSW32")
index.add(ds.get_database())
hnsw = index.hnsw
# get nb levels for each vector, and select one 
levels = faiss.vector_to_array(hnsw.levels)
levels.max()
3
np.where(levels == 3)
(array([592]),)
def vector_to_array(v): 
    """ make a vector visible as a numpy array (without copying data)"""
    return faiss.rev_swig_ptr(v.data(), v.size())

def get_hnsw_links(hnsw, vno): 
    """ get link strcutre for vertex vno """
    
    # make arrays visible from Python
    levels = vector_to_array(hnsw.levels)
    cum_nneighbor_per_level = vector_to_array(hnsw.cum_nneighbor_per_level)
    offsets = vector_to_array(hnsw.offsets)
    neighbors = vector_to_array(hnsw.neighbors)
    
    # all neighbors of vno
    neigh_vno = neighbors[offsets[vno] : offsets[vno + 1]]
    
    # break down per level 
    nlevel = levels[vno]
    return [
        neigh_vno[cum_nneighbor_per_level[l] : cum_nneighbor_per_level[l + 1]]
        for l in range(nlevel)
    ]                 
    
# get links for that vector
get_links(hnsw, 592)
[array([534, 100, 344, 536, 186,  32, 940,  28, 914, 469, 379, 248,  33,
        787, 952, 667, 924, 730, 547, 537, 338,  55, 105, 899, 146, 751,
        189, 512, 236, 506,  57, 858, 578, 199, 279, 649, 294, 347, 407,
        471,  80, 814, 101, 568, 771,  41, 712, 349, 242,  79, 118,  12,
        985, 890, 722, 510, 835, 129,  -1,  -1,  -1,  -1,  -1,  -1],
       dtype=int32),
 array([473, 763, 344, 511,  52, 569, 877, 994, 998, 935, 133, 982, 702,
        632,  73, 136, 239, 847, 364, 770, 737, 385, 331, 944, 765,  -1,
         -1,  -1,  -1,  -1,  -1,  -1], dtype=int32),
 array([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
       dtype=int32)]

There are three levels, the first (base level) has 64 entries. The levels above have 32. The link structure contains ids, that can be -1 when there are not enough links to fill the fixed-size array.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment