-
-
Save mdouze/7d5271e49a3d4b8c9c8d1eac8f4b9748 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "bc894e95", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import faiss\n", | |
"\n", | |
"from faiss.contrib import datasets" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "daaeedd7", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# make a 1000-vector dataset in 32D\n", | |
"ds = datasets.SyntheticDataset(32, 0, 1000, 0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "9989cc3f", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"index = faiss.index_factory(ds.d, \"HNSW32\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "3e46f777", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"index.add(ds.get_database())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "0534df40", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"hnsw = index.hnsw" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "61c64793", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# get nb levels for each vector, and select one \n", | |
"levels = faiss.vector_to_array(hnsw.levels)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "42f3c1b5", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"3" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"levels.max()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "9164df13", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(array([592]),)" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"np.where(levels == 3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"id": "4d69d75e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def vector_to_array(v): \n", | |
" \"\"\" make a vector visible as a numpy array (without copying data)\"\"\"\n", | |
" return faiss.rev_swig_ptr(v.data(), v.size())\n", | |
"\n", | |
"def get_hnsw_links(hnsw, vno): \n", | |
" \"\"\" get link strcutre for vertex vno \"\"\"\n", | |
" \n", | |
" # make arrays visible from Python\n", | |
" levels = vector_to_array(hnsw.levels)\n", | |
" cum_nneighbor_per_level = vector_to_array(hnsw.cum_nneighbor_per_level)\n", | |
" offsets = vector_to_array(hnsw.offsets)\n", | |
" neighbors = vector_to_array(hnsw.neighbors)\n", | |
" \n", | |
" # all neighbors of vno\n", | |
" neigh_vno = neighbors[offsets[vno] : offsets[vno + 1]]\n", | |
" \n", | |
" # break down per level \n", | |
" nlevel = levels[vno]\n", | |
" return [\n", | |
" neigh_vno[cum_nneighbor_per_level[l] : cum_nneighbor_per_level[l + 1]]\n", | |
" for l in range(nlevel)\n", | |
" ] \n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"id": "e8384c49", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[array([534, 100, 344, 536, 186, 32, 940, 28, 914, 469, 379, 248, 33,\n", | |
" 787, 952, 667, 924, 730, 547, 537, 338, 55, 105, 899, 146, 751,\n", | |
" 189, 512, 236, 506, 57, 858, 578, 199, 279, 649, 294, 347, 407,\n", | |
" 471, 80, 814, 101, 568, 771, 41, 712, 349, 242, 79, 118, 12,\n", | |
" 985, 890, 722, 510, 835, 129, -1, -1, -1, -1, -1, -1],\n", | |
" dtype=int32),\n", | |
" array([473, 763, 344, 511, 52, 569, 877, 994, 998, 935, 133, 982, 702,\n", | |
" 632, 73, 136, 239, 847, 364, 770, 737, 385, 331, 944, 765, -1,\n", | |
" -1, -1, -1, -1, -1, -1], dtype=int32),\n", | |
" array([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", | |
" -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],\n", | |
" dtype=int32)]" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# get links for that vector\n", | |
"get_links(hnsw, 592)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "e6ef5fad", | |
"metadata": {}, | |
"source": [ | |
"There are three levels, the first (base level) has 64 entries. The levels above have 32. The link structure contains ids, that can be -1 when there are not enough links to fill the fixed-size array." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "b529c4b8", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "bc894e95", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import faiss\n", | |
"\n", | |
"from faiss.contrib import datasets" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "daaeedd7", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# make a 1000-vector dataset in 32D\n", | |
"ds = datasets.SyntheticDataset(32, 0, 1000, 0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "9989cc3f", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"index = faiss.index_factory(ds.d, \"HNSW32\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "3e46f777", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"index.add(ds.get_database())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "0534df40", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"hnsw = index.hnsw" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "61c64793", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# get nb levels for each vector, and select one \n", | |
"levels = faiss.vector_to_array(hnsw.levels)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "42f3c1b5", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"3" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"levels.max()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "9164df13", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(array([592]),)" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"np.where(levels == 3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"id": "4d69d75e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def vector_to_array(v): \n", | |
" \"\"\" make a vector visible as a numpy array (without copying data)\"\"\"\n", | |
" return faiss.rev_swig_ptr(v.data(), v.size())\n", | |
"\n", | |
"def get_hnsw_links(hnsw, vno): \n", | |
" \"\"\" get link strcutre for vertex vno \"\"\"\n", | |
" \n", | |
" # make arrays visible from Python\n", | |
" levels = vector_to_array(hnsw.levels)\n", | |
" cum_nneighbor_per_level = vector_to_array(hnsw.cum_nneighbor_per_level)\n", | |
" offsets = vector_to_array(hnsw.offsets)\n", | |
" neighbors = vector_to_array(hnsw.neighbors)\n", | |
" \n", | |
" # all neighbors of vno\n", | |
" neigh_vno = neighbors[offsets[vno] : offsets[vno + 1]]\n", | |
" \n", | |
" # break down per level \n", | |
" nlevel = levels[vno]\n", | |
" return [\n", | |
" neigh_vno[cum_nneighbor_per_level[l] : cum_nneighbor_per_level[l + 1]]\n", | |
" for l in range(nlevel)\n", | |
" ] \n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"id": "e8384c49", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[array([534, 100, 344, 536, 186, 32, 940, 28, 914, 469, 379, 248, 33,\n", | |
" 787, 952, 667, 924, 730, 547, 537, 338, 55, 105, 899, 146, 751,\n", | |
" 189, 512, 236, 506, 57, 858, 578, 199, 279, 649, 294, 347, 407,\n", | |
" 471, 80, 814, 101, 568, 771, 41, 712, 349, 242, 79, 118, 12,\n", | |
" 985, 890, 722, 510, 835, 129, -1, -1, -1, -1, -1, -1],\n", | |
" dtype=int32),\n", | |
" array([473, 763, 344, 511, 52, 569, 877, 994, 998, 935, 133, 982, 702,\n", | |
" 632, 73, 136, 239, 847, 364, 770, 737, 385, 331, 944, 765, -1,\n", | |
" -1, -1, -1, -1, -1, -1], dtype=int32),\n", | |
" array([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,\n", | |
" -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],\n", | |
" dtype=int32)]" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# get links for that vector\n", | |
"get_links(hnsw, 592)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "e6ef5fad", | |
"metadata": {}, | |
"source": [ | |
"There are three levels, the first (base level) has 64 entries. The levels above have 32. The link structure contains ids, that can be -1 when there are not enough links to fill the fixed-size array." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "b529c4b8", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Because the json content is duplicated.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
It shows an invalid notebook. The notebook does not appear to be valid JSON.