Last active
April 17, 2017 12:25
-
-
Save hiiwave/dc8e6c3d4a535ea07c5bf380fe7826f3 to your computer and use it in GitHub Desktop.
coin_analogy.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": false, | |
"ExecuteTime": { | |
"start_time": "2017-04-17T20:26:00.554581", | |
"end_time": "2017-04-17T20:26:00.865721" | |
} | |
}, | |
"cell_type": "code", | |
"source": "import numpy as np\nimport collections\nimport scipy.stats as ss\nnp.random.seed(0)", | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Example 1\nConsider we have a list of coin throwing result. e.g. ['H' 'T' 'H' 'H' 'H', ...].\n\nTwo models are proposed to model the mechanism.\n* Model 1: Assuming a fair coin\n* Model 2: Consider unfair coin with different probability of throwing a 'H' and 'T' \n\nThe reproducing likelihood and predicting likelihood of both models are calculated and compared." | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": false, | |
"ExecuteTime": { | |
"start_time": "2017-04-17T20:26:00.866952", | |
"end_time": "2017-04-17T20:26:00.872952" | |
} | |
}, | |
"cell_type": "code", | |
"source": "def gen_synthetic(p_head, n_train, n_test):\n x_all = np.random.choice(['H', 'T'], n_train + n_test, p=[p_head, 1 - p_head])\n xtrain = x_all[:n_train]\n xtest = x_all[n_train:]\n return xtrain, xtest\n\nxtrain, xtest = gen_synthetic(0.7, 75, 25)\nprint(xtrain[:5])\nprint(xtest[:5])", | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "['H' 'T' 'H' 'H' 'H']\n['H' 'H' 'H' 'H' 'H']\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": false, | |
"ExecuteTime": { | |
"start_time": "2017-04-17T20:26:00.874928", | |
"end_time": "2017-04-17T20:26:01.112585" | |
} | |
}, | |
"cell_type": "code", | |
"source": "def eval_model(samples, p_head_est):\n probs = np.where(samples == 'H', p_head_est, 1 - p_head_est)\n return ss.mstats.gmean(probs)\n\n# Model 1\np_model1 = 0.5\nlikelihood_trn = eval_model(xtrain, p_model1)\nlikelihood_tst = eval_model(xtest , p_model1)\nprint (\"Model 1 with p_head_est {:.4f} has avg reproducing likelihood {:.4f} and avg predicting likelihood {:.4f}\".format(p_model1, likelihood_trn, likelihood_tst))\n\n# Model 2\np_model2 = collections.Counter(xtrain)['H'] / len(xtrain)\nlikelihood_trn = eval_model(xtrain, p_model2)\nlikelihood_tst = eval_model(xtest , p_model2)\nprint (\"Model 2 with p_head_est {:.4f} has avg reproducing likelihood {:.4f} and avg predicting likelihood {:.4f}\".format(p_model2, likelihood_trn, likelihood_tst))\n", | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "Model 1 with p_head_est 0.5000 has avg reproducing likelihood 0.5000 and avg predicting likelihood 0.5000\nModel 2 with p_head_est 0.7333 has avg reproducing likelihood 0.5599 and avg predicting likelihood 0.6495\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Example 2\nConsider we have a list of coin throwing pair result. \nEach round the coin is threw 2 times and recorded as a pair.\ne.g. [['H' 'H'], ['T' 'T'], ['H' 'H'], ['H' 'H'], ['H' 'T'], ...].\n\nThree models are proposed to model the mechanism.\n* Model 1: Assuming a fair coin\n* Model 2: Consider an unfair coin with probability $p_H$ of getting 'H', and believing the probability is independent to round.\n* Model 3: Consider an unfair coin with round-sensitive probability $p_{H1}$ and $p_{H2}$ of getting 'H' at the first and second round.\n\nThe reproducing likelihood and predicting likelihood of these three models are calculated and compared." | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": false, | |
"ExecuteTime": { | |
"start_time": "2017-04-17T20:26:01.115215", | |
"end_time": "2017-04-17T20:26:01.378901" | |
} | |
}, | |
"cell_type": "code", | |
"source": "def gen_synthetic(p_head, n_train, n_test):\n x_all = np.random.choice(['H', 'T'], 2 * (n_train + n_test), p=[p_head, 1 - p_head])\n x_all = x_all.reshape((-1, 2))\n # x_all = [tuple(x) for x in x_all]\n xtrain = x_all[:n_train]\n xtest = x_all[n_train:]\n return xtrain, xtest\n\nxtrain, xtest = gen_synthetic(0.7, 75, 30)\nprint(xtrain[:5])\nprint(xtest[:5])", | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "[['H' 'H']\n ['T' 'T']\n ['H' 'H']\n ['H' 'H']\n ['H' 'T']]\n[['H' 'T']\n ['T' 'T']\n ['T' 'H']\n ['H' 'H']\n ['T' 'H']]\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": false, | |
"ExecuteTime": { | |
"start_time": "2017-04-17T20:26:01.381313", | |
"end_time": "2017-04-17T20:26:01.663465" | |
} | |
}, | |
"cell_type": "code", | |
"source": "def eval_model(samples, p_head_est_r1, p_head_est_r2=None):\n if p_head_est_r2 is None:\n p_head_est_r2 = p_head_est_r1\n prob = 1.0\n for idx, x in np.ndenumerate(samples):\n # print (idx, x)\n if idx[1] == 0:\n prob *= p_head_est_r1 if x == 'H' else 1 - p_head_est_r1\n else:\n prob *= p_head_est_r2 if x == 'H' else 1 - p_head_est_r2\n return pow(prob, 1 / len(samples))\n\n# Model 1\np_model1 = 0.5\nlikelihood_trn = eval_model(xtrain, p_model1)\nlikelihood_tst = eval_model(xtest, p_model1)\nprint (\"Model 1 with p_head_est {:.4f} has avg reproducing likelihood {:.4f} and avg predicting likelihood {:.4f}\".format(p_model1, likelihood_trn, likelihood_tst))\n\n# Model 2\np_model2 = collections.Counter(xtrain.flatten())['H'] / (len(xtrain) * 2)\nlikelihood_trn = eval_model(xtrain, p_model2)\nlikelihood_tst = eval_model(xtest, p_model2)\nprint (\"Model 2 with p_head_est {:.4f} has avg reproducing likelihood {:.4f} and avg predicting likelihood {:.4f}\".format(p_model2, likelihood_trn, likelihood_tst))\n\n# Model 3\np_model3_r1 = collections.Counter(xtrain[:, 0])['H'] / len(xtrain)\np_model3_r2 = collections.Counter(xtrain[:, 1])['H'] / len(xtrain)\nlikelihood_trn = eval_model(xtrain, p_model3_r1, p_model3_r2)\nlikelihood_tst = eval_model(xtest, p_model3_r1, p_model3_r2)\nprint (\"Model 3 with p_head_est [{:.4f}, {:.4f}] has avg reproducing likelihood {:.4f} and avg predicting likelihood {:.4f}\".format(p_model3_r1, p_model3_r2, likelihood_trn, likelihood_tst))", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "Model 1 with p_head_est 0.5000 has avg reproducing likelihood 0.2500 and avg predicting likelihood 0.2500\nModel 2 with p_head_est 0.7200 has avg reproducing likelihood 0.3055 and avg predicting likelihood 0.2513\nModel 3 with p_head_est [0.7067, 0.7333] has avg reproducing likelihood 0.3057 and avg predicting likelihood 0.2494\n", | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true, | |
"collapsed": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"gist": { | |
"id": "dc8e6c3d4a535ea07c5bf380fe7826f3", | |
"data": { | |
"description": "coin_analogy.ipynb", | |
"public": false | |
} | |
}, | |
"kernelspec": { | |
"name": "conda-env-tensor3-py", | |
"display_name": "Python [conda env:tensor3]", | |
"language": "python" | |
}, | |
"toc": { | |
"threshold": 4, | |
"number_sections": false, | |
"toc_cell": false, | |
"toc_window_display": false, | |
"toc_section_display": "block", | |
"sideBar": true, | |
"navigate_menu": true, | |
"moveMenuLeft": true, | |
"colors": { | |
"hover_highlight": "#DAA520", | |
"selected_highlight": "#FFD700", | |
"running_highlight": "#FF0000" | |
}, | |
"nav_menu": { | |
"height": "47px", | |
"width": "254px" | |
} | |
}, | |
"language_info": { | |
"name": "python", | |
"file_extension": ".py", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"mimetype": "text/x-python", | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"version": "3.5.2" | |
}, | |
"_draft": { | |
"nbviewer_url": "https://gist.github.com/dc8e6c3d4a535ea07c5bf380fe7826f3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment