Skip to content

Instantly share code, notes, and snippets.

@mdouze
Created December 6, 2022 15:17
Show Gist options
  • Save mdouze/aef2078afdb12c027ed93672d9801399 to your computer and use it in GitHub Desktop.
Save mdouze/aef2078afdb12c027ed93672d9801399 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 10,
"id": "ac99456c",
"metadata": {},
"outputs": [],
"source": [
"import faiss \n",
"import numpy as np\n",
"\n",
"from faiss.contrib.datasets import SyntheticDataset\n",
"from faiss.contrib.inspect_tools import get_invlist"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "20a82636",
"metadata": {},
"outputs": [],
"source": [
"ds = SyntheticDataset(32, 1000, 200, 20)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "02d0ba1d",
"metadata": {},
"outputs": [],
"source": [
"# prepare an index \n",
"\n",
"index = faiss.index_factory(ds.d, \"IVF64,PQ4np\")\n",
"index.train(ds.get_train())\n",
"index.add(ds.get_database())"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "1d582605",
"metadata": {},
"outputs": [],
"source": [
"# reference search\n",
"Dref, Iref = index.search(ds.get_queries(), 10)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "8c81e144",
"metadata": {},
"outputs": [],
"source": [
"# now get the inverted lists content and remember it\n",
"stored_invlists = []\n",
"for i in range(index.nlist): \n",
" ids, codes = get_invlist(index.invlists, i)\n",
" # codes is a matrix of size N_i * code_size \n",
" # where N_i is the number of invlist entries\n",
" stored_invlists.append((ids, codes))"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "e3182acf",
"metadata": {},
"outputs": [],
"source": [
"# now clear out the inverted lists \n",
"index.reset()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "e8e2f542",
"metadata": {},
"outputs": [],
"source": [
"Dnew, Inew = index.search(ds.get_queries(), 10)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "3300810d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],\n",
" [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]])"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Inew[:2] # all -1s: the index is empty"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "f0e74865",
"metadata": {},
"outputs": [],
"source": [
"def add_invlist_entries(invlists, list_no, ids, codes): \n",
" \"\"\" \n",
" adds the entries defined by ids and codes into the list \n",
" this is the reverse of get_invlist\n",
" \"\"\"\n",
" ni, code_size = codes.shape\n",
" assert code_size == invlists.code_size \n",
" ni2, = ids.shape\n",
" assert ni == ni2\n",
" invlists.add_entries(\n",
" list_no, ni, \n",
" faiss.swig_ptr(ids), \n",
" faiss.swig_ptr(codes)\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "c642f26c",
"metadata": {},
"outputs": [],
"source": [
"# re-add to the inverted lists\n",
"for i in range(index.nlist): \n",
" ids, codes = stored_invlists[i]\n",
" add_invlist_entries(index.invlists, i, ids, codes) "
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "bcb4e0cb",
"metadata": {},
"outputs": [],
"source": [
"index.ntotal = index.invlists.compute_ntotal() # not mandatory but cool for consistency"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "d2c65c79",
"metadata": {},
"outputs": [],
"source": [
"Dnew, Inew = index.search(ds.get_queries(), 10)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "217499aa",
"metadata": {},
"outputs": [],
"source": [
"assert np.all(Dnew == Dref)\n",
"assert np.all(Inew == Iref)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "77c613ce",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment