Last active
April 23, 2017 01:31
-
-
Save kylemcdonald/b134042c88575d7ee3740ca1a75b07bd to your computer and use it in GitHub Desktop.
Comparison between CT, MRPT and FAISS.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 314 µs, sys: 218 ms, total: 219 ms\n", | |
"Wall time: 224 ms\n", | |
"(358359, 128)\n" | |
] | |
} | |
], | |
"source": [ | |
"import numpy as np\n", | |
"%time data128 = np.load('data128.npy')\n", | |
"print data128.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"indices = np.arange(len(data128))\n", | |
"np.random.shuffle(indices)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Failed to load GPU Faiss: No module named swigfaiss_gpu\n", | |
"Faiss falling back to CPU-only.\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 79 µs, sys: 12 µs, total: 91 µs\n", | |
"Wall time: 96.1 µs\n" | |
] | |
} | |
], | |
"source": [ | |
"import faiss\n", | |
"data128f = data128.astype(np.float32)\n", | |
"dimensions = data128.shape[1]\n", | |
"%time index = faiss.IndexFlatL2(dimensions)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 74.5 ms, sys: 67.2 ms, total: 142 ms\n", | |
"Wall time: 163 ms\n", | |
"CPU times: user 311 ms, sys: 204 ms, total: 515 ms\n", | |
"Wall time: 552 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%time index.train(data128.astype(np.float32))\n", | |
"%time index.add(data128.astype(np.float32))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 688 ms, sys: 121 ms, total: 809 ms\n", | |
"Wall time: 129 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%time D, nns = index.search(data128[:100].astype(np.float32), 10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 12.1 s, sys: 296 ms, total: 12.4 s\n", | |
"Wall time: 4.51 s\n" | |
] | |
} | |
], | |
"source": [ | |
"from covertree import CoverTree\n", | |
"%time ct = CoverTree.from_matrix(data128)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 14.6 s, sys: 12 ms, total: 14.6 s\n", | |
"Wall time: 3.93 s\n" | |
] | |
} | |
], | |
"source": [ | |
"queries = data128[indices[:100]]\n", | |
"%time nb = ct.kNearestNeighbours(queries, k=10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 19.8 s, sys: 48 ms, total: 19.9 s\n", | |
"Wall time: 4.99 s\n" | |
] | |
} | |
], | |
"source": [ | |
"import mrpt\n", | |
"index = mrpt.MRPTIndex(data128f, depth=5, n_trees=100)\n", | |
"%time index.build()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 3.25 s, sys: 16 ms, total: 3.26 s\n", | |
"Wall time: 1.09 s\n" | |
] | |
} | |
], | |
"source": [ | |
"def mrpt_anns(index, queries, k=10):\n", | |
" for query in queries:\n", | |
" yield index.ann(query, k, votes_required=4)\n", | |
" \n", | |
"queries = data128f[indices[:100]]\n", | |
"%time nb = list(mrpt_anns(index, queries))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment