Skip to content

Instantly share code, notes, and snippets.

@jamescalam
Last active March 10, 2024 06:51
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jamescalam/a09a16c17b677f2cf9c019114711f3bf to your computer and use it in GitHub Desktop.
Save jamescalam/a09a16c17b677f2cf9c019114711f3bf to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import shutil\n",
"import urllib.request as request\n",
"from contextlib import closing\n",
"\n",
"# first we download the Sift1M dataset\n",
"with closing(request.urlopen('ftp://ftp.irisa.fr/local/texmex/corpus/sift.tar.gz')) as r:\n",
" with open('sift.tar.gz', 'wb') as f:\n",
" shutil.copyfileobj(r, f)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import tarfile\n",
"\n",
"# the download leaves us with a tar.gz file, we unzip it\n",
"tar = tarfile.open('sift.tar.gz', \"r:gz\")\n",
"tar.extractall()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"# now define a function to read the fvecs file format of Sift1M dataset\n",
"def read_fvecs(fp):\n",
" a = np.fromfile(fp, dtype='int32')\n",
" d = a[0]\n",
" return a.reshape(-1, d + 1)[:, 1:].copy().view('float32')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# data we will search through\n",
"xb = read_fvecs('sift_base.fvecs') # 1M samples\n",
"# also get some query vectors to search with\n",
"xq = read_fvecs('./sift/sift_query.fvecs')\n",
"# take just one query (there are many in sift_learn.fvecs)\n",
"xq = xq[0].reshape(1, xq.shape[1])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1, 128)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xq.shape"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1000000, 128)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xb.shape"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1., 3., 11., 110., 62., 22., 4., 0., 43., 21., 22.,\n",
" 18., 6., 28., 64., 9., 11., 1., 0., 0., 1., 40.,\n",
" 101., 21., 20., 2., 4., 2., 2., 9., 18., 35., 1.,\n",
" 1., 7., 25., 108., 116., 63., 2., 0., 0., 11., 74.,\n",
" 40., 101., 116., 3., 33., 1., 1., 11., 14., 18., 116.,\n",
" 116., 68., 12., 5., 4., 2., 2., 9., 102., 17., 3.,\n",
" 10., 18., 8., 15., 67., 63., 15., 0., 14., 116., 80.,\n",
" 0., 2., 22., 96., 37., 28., 88., 43., 1., 4., 18.,\n",
" 116., 51., 5., 11., 32., 14., 8., 23., 44., 17., 12.,\n",
" 9., 0., 0., 19., 37., 85., 18., 16., 104., 22., 6.,\n",
" 2., 26., 12., 58., 67., 82., 25., 12., 2., 2., 25.,\n",
" 18., 8., 2., 19., 42., 48., 11.]], dtype=float32)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xq"
]
}
],
"metadata": {
"interpreter": {
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
},
"kernelspec": {
"display_name": "Python 3.9.5 64-bit",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment