Last active
July 16, 2016 03:32
-
-
Save wanji/c08693f06ef744feef50 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:28c83ae59bc46da0f79efcd9c8f7e58066b9dce4f9e61612bf1e6b8eb84a7efe" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Examples for `HDIdx`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# import necessary packages\n", | |
"\n", | |
"import hdidx\n", | |
"import numpy as np\n", | |
"\n", | |
"# print log messages\n", | |
"import logging\n", | |
"reload(logging)\n", | |
"logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s',\n", | |
" level=logging.INFO)\n", | |
"\n", | |
"# generating sample data\n", | |
"ndim = 256 # dimension of features\n", | |
"ndb = 10000 # number of dababase items\n", | |
"nqry = 120 # number of queries\n", | |
"\n", | |
"X_db = np.random.random((ndb, ndim)).astype(np.float64)\n", | |
"X_qry = np.random.random((nqry, ndim)).astype(np.float32)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Basic Usage" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# create Product Quantization Indexer\n", | |
"idx = hdidx.indexer.PQIndexer()\n", | |
"# set storage for the indexer, the default is store indexex in memory\n", | |
"idx.set_storage()\n", | |
"# build indexer\n", | |
"idx.build({'vals': X_db, 'nsubq': 8})\n", | |
"# add database items to the indexer\n", | |
"idx.add(X_db)\n", | |
"# searching in the database, and return top-100 items for each query\n", | |
"idx.search(X_qry, 100)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:18,613 - INFO - Building codebooks in subspaces - BEGIN\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:18,614 - INFO - \tsubspace 0/8:\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:18,877 - INFO - \tsubspace 1/8:\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:19,266 - INFO - \tsubspace 2/8:\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:19,527 - INFO - \tsubspace 3/8:\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:19,821 - INFO - \tsubspace 4/8:\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:20,192 - INFO - \tsubspace 5/8:\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:20,654 - INFO - \tsubspace 6/8:\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:21,007 - INFO - \tsubspace 7/8:\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:21,371 - INFO - Building codebooks in subspaces - DONE\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:21,372 - INFO - 0/10000: 10000\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:25,919 - INFO - Start Querying ...\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:25,973 - INFO - \t100/120: 0.0005s per query\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:25,973 - INFO - \t\tknn: 0.0001;\tdistance: 0.0001;\tresult: 0.0000;\tdistab: 0.0003\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:25,983 - INFO - Querying Finished!\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:25,984 - INFO - Average querying time: 0.0005\n" | |
] | |
}, | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 2, | |
"text": [ | |
"(array([[4875, 1010, 2924, ..., 1778, 9669, 6937],\n", | |
" [ 935, 7988, 6960, ..., 6155, 2862, 8552],\n", | |
" [4070, 6554, 2425, ..., 5717, 8434, 7749],\n", | |
" ..., \n", | |
" [6695, 6395, 8149, ..., 2395, 3857, 7193],\n", | |
" [5368, 77, 1238, ..., 1319, 8494, 2513],\n", | |
" [3920, 9361, 9265, ..., 1398, 2621, 4062]], dtype=int32),\n", | |
" array([[ 22.39608955, 22.64003181, 23.18744469, ..., 24.55779076,\n", | |
" 24.56216049, 24.56312752],\n", | |
" [ 21.87729645, 22.44916153, 22.9182663 , ..., 24.82294655,\n", | |
" 24.83014107, 24.83216476],\n", | |
" [ 24.05742455, 24.07415771, 24.18813896, ..., 26.00284767,\n", | |
" 26.00819397, 26.01751709],\n", | |
" ..., \n", | |
" [ 23.31334496, 23.70270348, 24.25839233, ..., 26.0488205 ,\n", | |
" 26.04973793, 26.05724144],\n", | |
" [ 24.52768135, 24.54415512, 24.70788956, ..., 26.36787415,\n", | |
" 26.36966705, 26.37992859],\n", | |
" [ 22.33849525, 22.79646301, 22.85318565, ..., 24.45709991,\n", | |
" 24.45932007, 24.46750259]], dtype=float32))" | |
] | |
} | |
], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Save indexer" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# create Product Quantization Indexer\n", | |
"idx = hdidx.indexer.PQIndexer()\n", | |
"# set storage for the indexer, this time we store the indexes into LMDB\n", | |
"idx.set_storage('lmdb', {'path': '/tmp/pq.idx'})\n", | |
"# build indexer\n", | |
"idx.build({'vals': X_db, 'nsubq': 8})\n", | |
"# save the index information for future use\n", | |
"idx.save('/tmp/pq.info')\n", | |
"# add database items to the indexer\n", | |
"idx.add(X_db)\n", | |
"# searching in the database, and return top-100 items for each query\n", | |
"idx.search(X_qry, 100)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:26,219 - INFO - Building codebooks in subspaces - BEGIN\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:26,219 - INFO - \tsubspace 0/8:\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:26,635 - INFO - \tsubspace 1/8:\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:27,027 - INFO - \tsubspace 2/8:\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:27,354 - INFO - \tsubspace 3/8:\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:27,619 - INFO - \tsubspace 4/8:\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:27,953 - INFO - \tsubspace 5/8:\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:28,295 - INFO - \tsubspace 6/8:\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:28,721 - INFO - \tsubspace 7/8:\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:29,056 - INFO - Building codebooks in subspaces - DONE\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:29,057 - INFO - 0/10000: 10000\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:31,449 - INFO - Start Querying ...\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:31,568 - INFO - \t100/120: 0.0012s per query\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:31,569 - INFO - \t\tknn: 0.0004;\tdistance: 0.0005;\tresult: 0.0000;\tdistab: 0.0003\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:31,594 - INFO - Querying Finished!\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:31,594 - INFO - Average querying time: 0.0012\n" | |
] | |
}, | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 3, | |
"text": [ | |
"(array([[33870, 6606, 25826, ..., 2502, 12495, 26082],\n", | |
" [42343, 49349, 13409, ..., 44196, 2002, 18067],\n", | |
" [47133, 20148, 37617, ..., 8761, 33348, 36543],\n", | |
" ..., \n", | |
" [ 2807, 10529, 37991, ..., 8355, 47615, 30899],\n", | |
" [13999, 36660, 3608, ..., 44946, 15916, 20692],\n", | |
" [36386, 13856, 46034, ..., 41069, 45679, 4505]], dtype=int32),\n", | |
" array([[ 22.21693611, 22.44924736, 22.58147049, ..., 23.68716049,\n", | |
" 23.69098854, 23.70442963],\n", | |
" [ 22.1412468 , 22.20767593, 22.21963882, ..., 24.13736343,\n", | |
" 24.13786697, 24.13886642],\n", | |
" [ 23.51828194, 23.58364487, 23.73697472, ..., 25.25225258,\n", | |
" 25.25513077, 25.25598907],\n", | |
" ..., \n", | |
" [ 23.4031868 , 23.53798866, 23.63532448, ..., 25.11426353,\n", | |
" 25.13235092, 25.13462448],\n", | |
" [ 23.5990181 , 23.77312851, 23.82258415, ..., 25.51331329,\n", | |
" 25.51656532, 25.51693535],\n", | |
" [ 21.98418999, 22.25404167, 22.51370621, ..., 23.68246841,\n", | |
" 23.68491936, 23.6861496 ]], dtype=float32))" | |
] | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Load existing indexer" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# create Product Quantization Indexer\n", | |
"idx = hdidx.indexer.PQIndexer()\n", | |
"# set storage for the indexer, this time we load the indexes from LMDB\n", | |
"idx.set_storage('lmdb', {'path': '/tmp/pq.idx'})\n", | |
"# save the index information for future use\n", | |
"idx.load('/tmp/pq.info')\n", | |
"# searching in the database, and return top-100 items for each query\n", | |
"idx.search(X_qry, 100)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:31,882 - INFO - Start Querying ...\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:32,000 - INFO - \t100/120: 0.0012s per query\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:32,001 - INFO - \t\tknn: 0.0004;\tdistance: 0.0005;\tresult: 0.0000;\tdistab: 0.0003\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:32,024 - INFO - Querying Finished!\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"2015-04-08 16:51:32,025 - INFO - Average querying time: 0.0012\n" | |
] | |
}, | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 4, | |
"text": [ | |
"(array([[33870, 6606, 25826, ..., 2502, 12495, 26082],\n", | |
" [42343, 49349, 13409, ..., 44196, 2002, 18067],\n", | |
" [47133, 20148, 37617, ..., 8761, 33348, 36543],\n", | |
" ..., \n", | |
" [ 2807, 10529, 37991, ..., 8355, 47615, 30899],\n", | |
" [13999, 36660, 3608, ..., 44946, 15916, 20692],\n", | |
" [36386, 13856, 46034, ..., 41069, 45679, 4505]], dtype=int32),\n", | |
" array([[ 22.21693611, 22.44924736, 22.58147049, ..., 23.68716049,\n", | |
" 23.69098854, 23.70442963],\n", | |
" [ 22.1412468 , 22.20767593, 22.21963882, ..., 24.13736343,\n", | |
" 24.13786697, 24.13886642],\n", | |
" [ 23.51828194, 23.58364487, 23.73697472, ..., 25.25225258,\n", | |
" 25.25513077, 25.25598907],\n", | |
" ..., \n", | |
" [ 23.4031868 , 23.53798866, 23.63532448, ..., 25.11426353,\n", | |
" 25.13235092, 25.13462448],\n", | |
" [ 23.5990181 , 23.77312851, 23.82258415, ..., 25.51331329,\n", | |
" 25.51656532, 25.51693535],\n", | |
" [ 21.98418999, 22.25404167, 22.51370621, ..., 23.68246841,\n", | |
" 23.68491936, 23.6861496 ]], dtype=float32))" | |
] | |
} | |
], | |
"prompt_number": 4 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment