Skip to content

Instantly share code, notes, and snippets.

@kimiyuki
Last active November 29, 2017 14:20
Show Gist options
  • Save kimiyuki/08c055ca99509dbf9ef8bb525f5798a4 to your computer and use it in GitHub Desktop.
Save kimiyuki/08c055ca99509dbf9ef8bb525f5798a4 to your computer and use it in GitHub Desktop.
F1-simulation.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "n_products = 100\nproducts = [\"x\"+str(i) for i in range(1,n_products+1)]\ndef f1(ans, prd):\n precision = len(prd.intersection(ans))/len(ans)\n recall = len(ans.intersection(prd))/len(prd) \n ret = 0 if precision == 0 or recall == 0 else 2 * (precision * recall)/(precision + recall)\n return (ret, precision, recall)",
"execution_count": 115,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "ans = set([\"apple\", \"banana\", \"bread\",\"milk\"])\nprd = set([\"apple\", \"banana\", \"lemon\", \"floor\"])\nf1(ans, prd)",
"execution_count": 125,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 125,
"data": {
"text/plain": "(0.5, 0.5, 0.5)"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "from sklearn.metrics import f1_score\n#f1_score(ans, prd, average=\"macro\")\n#how to deal with different numbers between y_true, y_pred\n#http://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html",
"execution_count": 143,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import numpy as np\ndef rep_n(re_rate):\n actual_bought = np.random.choice(products, int(re_rate * n_products))\n monkey_choice = np.random.choice(products, int(re_rate * n_products))\n return f1(set(actual_bought), set(monkey_choice))",
"execution_count": 134,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "ret = np.array([rep_n(re_rate) for _ in range(0,10000)])\nret.mean(axis=0)",
"execution_count": 131,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 131,
"data": {
"text/plain": "array([ 0.23332699, 0.24596639, 0.22278604])"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import itertools\nprint(\"re_rate, f1, precision, recall\")\nfor r in itertools.islice(itertools.count(start=0.01, step=0.01), 60):\n ret = np.array([rep_n(r) for _ in range(0,10000)])\n ret0 = ret.copy()\n ret0[ret0==0] = np.nan\n print(f're_rate:{round(r,2)}, score:{ret.mean(axis=0).round(3)}, score_no_zero:{np.nanmean(ret0, axis=0).round(3)}')",
"execution_count": 142,
"outputs": [
{
"output_type": "stream",
"text": "re_rate, f1, precision, recall\nre_rate:0.01, score:[ 0.011 0.011 0.011], score_no_zero:[ 1. 1. 1.]\nre_rate:0.02, score:[ 0.02 0.02 0.02], score_no_zero:[ 0.505 0.508 0.505]\nre_rate:0.03, score:[ 0.028 0.028 0.028], score_no_zero:[ 0.343 0.344 0.344]\nre_rate:0.04, score:[ 0.039 0.039 0.039], score_no_zero:[ 0.266 0.266 0.266]\nre_rate:0.05, score:[ 0.048 0.048 0.048], score_no_zero:[ 0.221 0.222 0.222]\nre_rate:0.06, score:[ 0.059 0.059 0.059], score_no_zero:[ 0.193 0.193 0.194]\nre_rate:0.07, score:[ 0.068 0.068 0.068], score_no_zero:[ 0.175 0.175 0.176]\nre_rate:0.08, score:[ 0.075 0.076 0.076], score_no_zero:[ 0.162 0.162 0.162]\nre_rate:0.09, score:[ 0.085 0.086 0.086], score_no_zero:[ 0.155 0.155 0.155]\nre_rate:0.1, score:[ 0.096 0.096 0.096], score_no_zero:[ 0.151 0.151 0.151]\nre_rate:0.11, score:[ 0.096 0.096 0.096], score_no_zero:[ 0.151 0.151 0.151]\nre_rate:0.12, score:[ 0.106 0.106 0.106], score_no_zero:[ 0.15 0.15 0.15]\nre_rate:0.13, score:[ 0.114 0.114 0.114], score_no_zero:[ 0.148 0.148 0.148]\nre_rate:0.14, score:[ 0.122 0.122 0.122], score_no_zero:[ 0.15 0.151 0.151]\nre_rate:0.15, score:[ 0.141 0.141 0.141], score_no_zero:[ 0.157 0.158 0.158]\nre_rate:0.16, score:[ 0.149 0.149 0.149], score_no_zero:[ 0.162 0.162 0.162]\nre_rate:0.17, score:[ 0.159 0.159 0.16 ], score_no_zero:[ 0.168 0.168 0.168]\nre_rate:0.18, score:[ 0.164 0.164 0.164], score_no_zero:[ 0.171 0.171 0.171]\nre_rate:0.19, score:[ 0.175 0.175 0.175], score_no_zero:[ 0.179 0.18 0.18 ]\nre_rate:0.2, score:[ 0.181 0.182 0.181], score_no_zero:[ 0.184 0.185 0.185]\nre_rate:0.21, score:[ 0.19 0.19 0.19], score_no_zero:[ 0.192 0.192 0.192]\nre_rate:0.22, score:[ 0.2 0.2 0.2], score_no_zero:[ 0.202 0.202 0.202]\nre_rate:0.23, score:[ 0.206 0.207 0.206], score_no_zero:[ 0.207 0.208 0.207]\nre_rate:0.24, score:[ 0.214 0.214 0.214], score_no_zero:[ 0.215 0.215 0.215]\nre_rate:0.25, score:[ 0.223 0.223 0.223], score_no_zero:[ 0.223 0.223 0.223]\nre_rate:0.26, score:[ 0.23 0.23 0.23], score_no_zero:[ 0.23 0.23 0.23]\nre_rate:0.27, score:[ 0.236 0.236 0.236], score_no_zero:[ 0.236 0.236 0.236]\nre_rate:0.28, score:[ 0.246 0.247 0.246], score_no_zero:[ 0.246 0.247 0.247]\nre_rate:0.29, score:[ 0.252 0.253 0.253], score_no_zero:[ 0.252 0.253 0.253]\nre_rate:0.3, score:[ 0.259 0.26 0.26 ], score_no_zero:[ 0.259 0.26 0.26 ]\nre_rate:0.31, score:[ 0.266 0.267 0.266], score_no_zero:[ 0.266 0.267 0.266]\nre_rate:0.32, score:[ 0.274 0.275 0.275], score_no_zero:[ 0.274 0.275 0.275]\nre_rate:0.33, score:[ 0.283 0.283 0.284], score_no_zero:[ 0.283 0.283 0.284]\nre_rate:0.34, score:[ 0.289 0.29 0.29 ], score_no_zero:[ 0.289 0.29 0.29 ]\nre_rate:0.35, score:[ 0.298 0.299 0.298], score_no_zero:[ 0.298 0.299 0.298]\nre_rate:0.36, score:[ 0.303 0.304 0.304], score_no_zero:[ 0.303 0.304 0.304]\nre_rate:0.37, score:[ 0.309 0.31 0.31 ], score_no_zero:[ 0.309 0.31 0.31 ]\nre_rate:0.38, score:[ 0.317 0.317 0.317], score_no_zero:[ 0.317 0.317 0.317]\nre_rate:0.39, score:[ 0.324 0.325 0.325], score_no_zero:[ 0.324 0.325 0.325]\nre_rate:0.4, score:[ 0.331 0.332 0.332], score_no_zero:[ 0.331 0.332 0.332]\nre_rate:0.41, score:[ 0.337 0.338 0.338], score_no_zero:[ 0.337 0.338 0.338]\nre_rate:0.42, score:[ 0.344 0.344 0.344], score_no_zero:[ 0.344 0.344 0.344]\nre_rate:0.43, score:[ 0.352 0.352 0.352], score_no_zero:[ 0.352 0.352 0.352]\nre_rate:0.44, score:[ 0.357 0.358 0.358], score_no_zero:[ 0.357 0.358 0.358]\nre_rate:0.45, score:[ 0.362 0.363 0.363], score_no_zero:[ 0.362 0.363 0.363]\nre_rate:0.46, score:[ 0.37 0.37 0.37], score_no_zero:[ 0.37 0.37 0.37]\nre_rate:0.47, score:[ 0.375 0.375 0.375], score_no_zero:[ 0.375 0.375 0.375]\nre_rate:0.48, score:[ 0.383 0.383 0.383], score_no_zero:[ 0.383 0.383 0.383]\nre_rate:0.49, score:[ 0.389 0.389 0.389], score_no_zero:[ 0.389 0.389 0.389]\nre_rate:0.5, score:[ 0.394 0.395 0.395], score_no_zero:[ 0.394 0.395 0.395]\nre_rate:0.51, score:[ 0.4 0.401 0.401], score_no_zero:[ 0.4 0.401 0.401]\nre_rate:0.52, score:[ 0.406 0.407 0.407], score_no_zero:[ 0.406 0.407 0.407]\nre_rate:0.53, score:[ 0.413 0.413 0.414], score_no_zero:[ 0.413 0.413 0.414]\nre_rate:0.54, score:[ 0.418 0.419 0.419], score_no_zero:[ 0.418 0.419 0.419]\nre_rate:0.55, score:[ 0.423 0.424 0.424], score_no_zero:[ 0.423 0.424 0.424]\nre_rate:0.56, score:[ 0.429 0.43 0.43 ], score_no_zero:[ 0.429 0.43 0.43 ]\nre_rate:0.57, score:[ 0.435 0.437 0.436], score_no_zero:[ 0.435 0.437 0.436]\nre_rate:0.58, score:[ 0.441 0.442 0.441], score_no_zero:[ 0.441 0.442 0.441]\nre_rate:0.59, score:[ 0.446 0.447 0.447], score_no_zero:[ 0.446 0.447 0.447]\nre_rate:0.6, score:[ 0.452 0.453 0.452], score_no_zero:[ 0.452 0.453 0.452]\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.6.3",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "08c055ca99509dbf9ef8bb525f5798a4",
"data": {
"description": "F1-simulation.ipynb",
"public": true
}
},
"_draft": {
"nbviewer_url": "https://gist.github.com/08c055ca99509dbf9ef8bb525f5798a4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment