Skip to content

Instantly share code, notes, and snippets.

@stuppie
Created August 22, 2016 19:05
Show Gist options
  • Save stuppie/4d45459df3be477c5e46535aeb5c9b7e to your computer and use it in GitHub Desktop.
Save stuppie/4d45459df3be477c5e46535aeb5c9b7e to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"from scipy.misc import comb\n",
"from scipy.sparse import coo_matrix, find\n",
"from scipy.sparse.data import _data_matrix\n",
"\n",
"from random import randint"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"labels_true = [randint(0,5000) for _ in range(1000000)]\n",
"labels_pred = [randint(0,5000) for _ in range(1000000)]"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"classes, class_idx = np.unique(labels_true, return_inverse=True)\n",
"clusters, cluster_idx = np.unique(labels_pred, return_inverse=True)\n",
"n_classes = classes.shape[0]\n",
"n_clusters = clusters.shape[0]\n",
"\n",
"c = coo_matrix((np.ones(class_idx.shape[0]),(class_idx, cluster_idx)),\n",
" shape=(n_classes, n_clusters),\n",
" dtype=np.int)\n",
"c_array = c.toarray()\n",
"csr = c.tocsr()\n",
"csc = c.tocsc()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"100 loops, best of 3: 11.6 ms per loop\n",
"100 loops, best of 3: 4.06 ms per loop\n",
"10 loops, best of 3: 107 ms per loop\n",
"10 loops, best of 3: 93.9 ms per loop\n"
]
}
],
"source": [
"%timeit c.sum(axis=0)\n",
"%timeit c.sum(axis=1)\n",
"%timeit find(c)\n",
"%timeit c.tocsc().data.flatten()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10 loops, best of 3: 34.8 ms per loop\n",
"10 loops, best of 3: 29.2 ms per loop\n",
"10 loops, best of 3: 51.3 ms per loop\n"
]
}
],
"source": [
"%timeit c_array.sum(axis=0)\n",
"%timeit c_array.sum(axis=1)\n",
"%timeit c_array.flatten()"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"100 loops, best of 3: 2.52 ms per loop\n",
"1000 loops, best of 3: 1.42 ms per loop\n",
"10 loops, best of 3: 38.4 ms per loop\n",
"1000 loops, best of 3: 1.03 ms per loop\n"
]
}
],
"source": [
"%timeit csr.sum(axis=0)\n",
"%timeit csr.sum(axis=1)\n",
"%timeit find(csr)\n",
"%timeit csr.data.flatten()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1000 loops, best of 3: 1.42 ms per loop\n",
"100 loops, best of 3: 2.41 ms per loop\n",
"10 loops, best of 3: 56.4 ms per loop\n",
"1000 loops, best of 3: 1.02 ms per loop\n"
]
}
],
"source": [
"%timeit csc.sum(axis=0)\n",
"%timeit csc.sum(axis=1)\n",
"%timeit find(csc)\n",
"%timeit csc.data.flatten()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 loop, best of 3: 1.41 s per loop\n",
"1 loop, best of 3: 3.38 s per loop\n"
]
}
],
"source": [
"from sklearn.metrics import cluster\n",
"%timeit cluster.homogeneity_completeness_v_measure(labels_true, labels_pred)\n",
"%timeit cluster.homogeneity_completeness_v_measure(labels_true, labels_pred, sparse=True)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 loop, best of 3: 415 ms per loop\n",
"1 loop, best of 3: 2.46 s per loop\n",
"1 loop, best of 3: 3.28 s per loop\n",
"1 loop, best of 3: 3.36 s per loop\n"
]
}
],
"source": [
"%timeit cluster.mutual_info_score(labels_true, labels_pred, contingency=c_array)\n",
"%timeit cluster.mutual_info_score(labels_true, labels_pred, contingency=c)\n",
"%timeit cluster.mutual_info_score(labels_true, labels_pred, contingency=csr)\n",
"%timeit cluster.mutual_info_score(labels_true, labels_pred, contingency=csc)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment