hiiwave/coin_analogy.ipynb

## coin_analogy.ipynb
{
  "cells": [
    {
      "metadata": {
        "trusted": true,
        "collapsed": false,
        "ExecuteTime": {
          "start_time": "2017-04-17T20:26:00.554581",
          "end_time": "2017-04-17T20:26:00.865721"
        }
      },
      "cell_type": "code",
      "source": "import numpy as np\nimport collections\nimport scipy.stats as ss\nnp.random.seed(0)",
      "execution_count": 1,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## Example 1\nConsider we have a list of coin throwing result. e.g. ['H' 'T' 'H' 'H' 'H', ...].\n\nTwo models are proposed to model the mechanism.\n* Model 1: Assuming a fair coin\n* Model 2: Consider unfair coin with different probability of throwing a 'H' and 'T' \n\nThe reproducing likelihood and predicting likelihood of both models are calculated and compared."
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": false,
        "ExecuteTime": {
          "start_time": "2017-04-17T20:26:00.866952",
          "end_time": "2017-04-17T20:26:00.872952"
        }
      },
      "cell_type": "code",
      "source": "def gen_synthetic(p_head, n_train, n_test):\n    x_all = np.random.choice(['H', 'T'], n_train + n_test, p=[p_head, 1 - p_head])\n    xtrain = x_all[:n_train]\n    xtest = x_all[n_train:]\n    return xtrain, xtest\n\nxtrain, xtest = gen_synthetic(0.7, 75, 25)\nprint(xtrain[:5])\nprint(xtest[:5])",
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "text": "['H' 'T' 'H' 'H' 'H']\n['H' 'H' 'H' 'H' 'H']\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": false,
        "ExecuteTime": {
          "start_time": "2017-04-17T20:26:00.874928",
          "end_time": "2017-04-17T20:26:01.112585"
        }
      },
      "cell_type": "code",
      "source": "def eval_model(samples, p_head_est):\n    probs = np.where(samples == 'H', p_head_est, 1 - p_head_est)\n    return ss.mstats.gmean(probs)\n\n# Model 1\np_model1 = 0.5\nlikelihood_trn = eval_model(xtrain, p_model1)\nlikelihood_tst = eval_model(xtest , p_model1)\nprint (\"Model 1 with p_head_est {:.4f} has avg reproducing likelihood {:.4f} and avg predicting likelihood {:.4f}\".format(p_model1, likelihood_trn, likelihood_tst))\n\n# Model 2\np_model2 = collections.Counter(xtrain)['H'] / len(xtrain)\nlikelihood_trn = eval_model(xtrain, p_model2)\nlikelihood_tst = eval_model(xtest , p_model2)\nprint (\"Model 2 with p_head_est {:.4f} has avg reproducing likelihood {:.4f} and avg predicting likelihood {:.4f}\".format(p_model2, likelihood_trn, likelihood_tst))\n",
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "text": "Model 1 with p_head_est 0.5000 has avg reproducing likelihood 0.5000 and avg predicting likelihood 0.5000\nModel 2 with p_head_est 0.7333 has avg reproducing likelihood 0.5599 and avg predicting likelihood 0.6495\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## Example 2\nConsider we have a list of coin throwing pair result. \nEach round the coin is threw 2 times and recorded as a pair.\ne.g. [['H' 'H'], ['T' 'T'], ['H' 'H'], ['H' 'H'], ['H' 'T'], ...].\n\nThree models are proposed to model the mechanism.\n* Model 1: Assuming a fair coin\n* Model 2: Consider an unfair coin with probability $p_H$ of getting 'H', and believing the probability is independent to round.\n* Model 3: Consider an unfair coin with round-sensitive probability $p_{H1}$ and $p_{H2}$ of getting 'H' at the first and second round.\n\nThe reproducing likelihood and predicting likelihood of these three models are calculated and compared."
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": false,
        "ExecuteTime": {
          "start_time": "2017-04-17T20:26:01.115215",
          "end_time": "2017-04-17T20:26:01.378901"
        }
      },
      "cell_type": "code",
      "source": "def gen_synthetic(p_head, n_train, n_test):\n    x_all = np.random.choice(['H', 'T'], 2 * (n_train + n_test), p=[p_head, 1 - p_head])\n    x_all = x_all.reshape((-1, 2))\n    # x_all = [tuple(x) for x in x_all]\n    xtrain = x_all[:n_train]\n    xtest = x_all[n_train:]\n    return xtrain, xtest\n\nxtrain, xtest = gen_synthetic(0.7, 75, 30)\nprint(xtrain[:5])\nprint(xtest[:5])",
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "text": "[['H' 'H']\n ['T' 'T']\n ['H' 'H']\n ['H' 'H']\n ['H' 'T']]\n[['H' 'T']\n ['T' 'T']\n ['T' 'H']\n ['H' 'H']\n ['T' 'H']]\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": false,
        "ExecuteTime": {
          "start_time": "2017-04-17T20:26:01.381313",
          "end_time": "2017-04-17T20:26:01.663465"
        }
      },
      "cell_type": "code",
      "source": "def eval_model(samples, p_head_est_r1, p_head_est_r2=None):\n    if p_head_est_r2 is None:\n        p_head_est_r2 = p_head_est_r1\n    prob = 1.0\n    for idx, x in np.ndenumerate(samples):\n        # print (idx, x)\n        if idx[1] == 0:\n            prob *= p_head_est_r1 if x == 'H' else 1 - p_head_est_r1\n        else:\n            prob *= p_head_est_r2 if x == 'H' else 1 - p_head_est_r2\n    return pow(prob, 1 / len(samples))\n\n# Model 1\np_model1 = 0.5\nlikelihood_trn = eval_model(xtrain, p_model1)\nlikelihood_tst = eval_model(xtest,  p_model1)\nprint (\"Model 1 with p_head_est {:.4f} has avg reproducing likelihood {:.4f} and avg predicting likelihood {:.4f}\".format(p_model1, likelihood_trn, likelihood_tst))\n\n# Model 2\np_model2 = collections.Counter(xtrain.flatten())['H'] / (len(xtrain) * 2)\nlikelihood_trn = eval_model(xtrain, p_model2)\nlikelihood_tst = eval_model(xtest,  p_model2)\nprint (\"Model 2 with p_head_est {:.4f} has avg reproducing likelihood {:.4f} and avg predicting likelihood {:.4f}\".format(p_model2, likelihood_trn, likelihood_tst))\n\n# Model 3\np_model3_r1 = collections.Counter(xtrain[:, 0])['H'] / len(xtrain)\np_model3_r2 = collections.Counter(xtrain[:, 1])['H'] / len(xtrain)\nlikelihood_trn = eval_model(xtrain, p_model3_r1, p_model3_r2)\nlikelihood_tst = eval_model(xtest,  p_model3_r1, p_model3_r2)\nprint (\"Model 3 with p_head_est [{:.4f}, {:.4f}] has avg reproducing likelihood {:.4f} and avg predicting likelihood {:.4f}\".format(p_model3_r1, p_model3_r2, likelihood_trn, likelihood_tst))",
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "text": "Model 1 with p_head_est 0.5000 has avg reproducing likelihood 0.2500 and avg predicting likelihood 0.2500\nModel 2 with p_head_est 0.7200 has avg reproducing likelihood 0.3055 and avg predicting likelihood 0.2513\nModel 3 with p_head_est [0.7067, 0.7333] has avg reproducing likelihood 0.3057 and avg predicting likelihood 0.2494\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "anaconda-cloud": {},
    "gist": {
      "id": "dc8e6c3d4a535ea07c5bf380fe7826f3",
      "data": {
        "description": "coin_analogy.ipynb",
        "public": false
      }
    },
    "kernelspec": {
      "name": "conda-env-tensor3-py",
      "display_name": "Python [conda env:tensor3]",
      "language": "python"
    },
    "toc": {
      "threshold": 4,
      "number_sections": false,
      "toc_cell": false,
      "toc_window_display": false,
      "toc_section_display": "block",
      "sideBar": true,
      "navigate_menu": true,
      "moveMenuLeft": true,
      "colors": {
        "hover_highlight": "#DAA520",
        "selected_highlight": "#FFD700",
        "running_highlight": "#FF0000"
      },
      "nav_menu": {
        "height": "47px",
        "width": "254px"
      }
    },
    "language_info": {
      "name": "python",
      "file_extension": ".py",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "mimetype": "text/x-python",
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "version": "3.5.2"
    },
    "_draft": {
      "nbviewer_url": "https://gist.github.com/dc8e6c3d4a535ea07c5bf380fe7826f3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}
	{
	"cells": [
	{
	"metadata": {
	"trusted": true,
	"collapsed": false,
	"ExecuteTime": {
	"start_time": "2017-04-17T20:26:00.554581",
	"end_time": "2017-04-17T20:26:00.865721"
	}
	},
	"cell_type": "code",
	"source": "import numpy as np\nimport collections\nimport scipy.stats as ss\nnp.random.seed(0)",
	"execution_count": 1,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "## Example 1\nConsider we have a list of coin throwing result. e.g. ['H' 'T' 'H' 'H' 'H', ...].\n\nTwo models are proposed to model the mechanism.\n* Model 1: Assuming a fair coin\n* Model 2: Consider unfair coin with different probability of throwing a 'H' and 'T' \n\nThe reproducing likelihood and predicting likelihood of both models are calculated and compared."
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false,
	"ExecuteTime": {
	"start_time": "2017-04-17T20:26:00.866952",
	"end_time": "2017-04-17T20:26:00.872952"
	}
	},
	"cell_type": "code",
	"source": "def gen_synthetic(p_head, n_train, n_test):\n x_all = np.random.choice(['H', 'T'], n_train + n_test, p=[p_head, 1 - p_head])\n xtrain = x_all[:n_train]\n xtest = x_all[n_train:]\n return xtrain, xtest\n\nxtrain, xtest = gen_synthetic(0.7, 75, 25)\nprint(xtrain[:5])\nprint(xtest[:5])",
	"execution_count": 2,
	"outputs": [
	{
	"output_type": "stream",
	"text": "['H' 'T' 'H' 'H' 'H']\n['H' 'H' 'H' 'H' 'H']\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false,
	"ExecuteTime": {
	"start_time": "2017-04-17T20:26:00.874928",
	"end_time": "2017-04-17T20:26:01.112585"
	}
	},
	"cell_type": "code",
	"source": "def eval_model(samples, p_head_est):\n probs = np.where(samples == 'H', p_head_est, 1 - p_head_est)\n return ss.mstats.gmean(probs)\n\n# Model 1\np_model1 = 0.5\nlikelihood_trn = eval_model(xtrain, p_model1)\nlikelihood_tst = eval_model(xtest , p_model1)\nprint (\"Model 1 with p_head_est {:.4f} has avg reproducing likelihood {:.4f} and avg predicting likelihood {:.4f}\".format(p_model1, likelihood_trn, likelihood_tst))\n\n# Model 2\np_model2 = collections.Counter(xtrain)['H'] / len(xtrain)\nlikelihood_trn = eval_model(xtrain, p_model2)\nlikelihood_tst = eval_model(xtest , p_model2)\nprint (\"Model 2 with p_head_est {:.4f} has avg reproducing likelihood {:.4f} and avg predicting likelihood {:.4f}\".format(p_model2, likelihood_trn, likelihood_tst))\n",
	"execution_count": 3,
	"outputs": [
	{
	"output_type": "stream",
	"text": "Model 1 with p_head_est 0.5000 has avg reproducing likelihood 0.5000 and avg predicting likelihood 0.5000\nModel 2 with p_head_est 0.7333 has avg reproducing likelihood 0.5599 and avg predicting likelihood 0.6495\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "## Example 2\nConsider we have a list of coin throwing pair result. \nEach round the coin is threw 2 times and recorded as a pair.\ne.g. [['H' 'H'], ['T' 'T'], ['H' 'H'], ['H' 'H'], ['H' 'T'], ...].\n\nThree models are proposed to model the mechanism.\n* Model 1: Assuming a fair coin\n* Model 2: Consider an unfair coin with probability $p_H$ of getting 'H', and believing the probability is independent to round.\n* Model 3: Consider an unfair coin with round-sensitive probability $p_{H1}$ and $p_{H2}$ of getting 'H' at the first and second round.\n\nThe reproducing likelihood and predicting likelihood of these three models are calculated and compared."
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false,
	"ExecuteTime": {
	"start_time": "2017-04-17T20:26:01.115215",
	"end_time": "2017-04-17T20:26:01.378901"
	}
	},
	"cell_type": "code",
	"source": "def gen_synthetic(p_head, n_train, n_test):\n x_all = np.random.choice(['H', 'T'], 2 * (n_train + n_test), p=[p_head, 1 - p_head])\n x_all = x_all.reshape((-1, 2))\n # x_all = [tuple(x) for x in x_all]\n xtrain = x_all[:n_train]\n xtest = x_all[n_train:]\n return xtrain, xtest\n\nxtrain, xtest = gen_synthetic(0.7, 75, 30)\nprint(xtrain[:5])\nprint(xtest[:5])",
	"execution_count": 4,
	"outputs": [
	{
	"output_type": "stream",
	"text": "[['H' 'H']\n ['T' 'T']\n ['H' 'H']\n ['H' 'H']\n ['H' 'T']]\n[['H' 'T']\n ['T' 'T']\n ['T' 'H']\n ['H' 'H']\n ['T' 'H']]\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false,
	"ExecuteTime": {
	"start_time": "2017-04-17T20:26:01.381313",
	"end_time": "2017-04-17T20:26:01.663465"
	}
	},
	"cell_type": "code",
	"source": "def eval_model(samples, p_head_est_r1, p_head_est_r2=None):\n if p_head_est_r2 is None:\n p_head_est_r2 = p_head_est_r1\n prob = 1.0\n for idx, x in np.ndenumerate(samples):\n # print (idx, x)\n if idx[1] == 0:\n prob = p_head_est_r1 if x == 'H' else 1 - p_head_est_r1\n else:\n prob = p_head_est_r2 if x == 'H' else 1 - p_head_est_r2\n return pow(prob, 1 / len(samples))\n\n# Model 1\np_model1 = 0.5\nlikelihood_trn = eval_model(xtrain, p_model1)\nlikelihood_tst = eval_model(xtest, p_model1)\nprint (\"Model 1 with p_head_est {:.4f} has avg reproducing likelihood {:.4f} and avg predicting likelihood {:.4f}\".format(p_model1, likelihood_trn, likelihood_tst))\n\n# Model 2\np_model2 = collections.Counter(xtrain.flatten())['H'] / (len(xtrain) * 2)\nlikelihood_trn = eval_model(xtrain, p_model2)\nlikelihood_tst = eval_model(xtest, p_model2)\nprint (\"Model 2 with p_head_est {:.4f} has avg reproducing likelihood {:.4f} and avg predicting likelihood {:.4f}\".format(p_model2, likelihood_trn, likelihood_tst))\n\n# Model 3\np_model3_r1 = collections.Counter(xtrain[:, 0])['H'] / len(xtrain)\np_model3_r2 = collections.Counter(xtrain[:, 1])['H'] / len(xtrain)\nlikelihood_trn = eval_model(xtrain, p_model3_r1, p_model3_r2)\nlikelihood_tst = eval_model(xtest, p_model3_r1, p_model3_r2)\nprint (\"Model 3 with p_head_est [{:.4f}, {:.4f}] has avg reproducing likelihood {:.4f} and avg predicting likelihood {:.4f}\".format(p_model3_r1, p_model3_r2, likelihood_trn, likelihood_tst))",
	"execution_count": 5,
	"outputs": [
	{
	"output_type": "stream",
	"text": "Model 1 with p_head_est 0.5000 has avg reproducing likelihood 0.2500 and avg predicting likelihood 0.2500\nModel 2 with p_head_est 0.7200 has avg reproducing likelihood 0.3055 and avg predicting likelihood 0.2513\nModel 3 with p_head_est [0.7067, 0.7333] has avg reproducing likelihood 0.3057 and avg predicting likelihood 0.2494\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": true
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	}
	],
	"metadata": {
	"anaconda-cloud": {},
	"gist": {
	"id": "dc8e6c3d4a535ea07c5bf380fe7826f3",
	"data": {
	"description": "coin_analogy.ipynb",
	"public": false
	}
	},
	"kernelspec": {
	"name": "conda-env-tensor3-py",
	"display_name": "Python [conda env:tensor3]",
	"language": "python"
	},
	"toc": {
	"threshold": 4,
	"number_sections": false,
	"toc_cell": false,
	"toc_window_display": false,
	"toc_section_display": "block",
	"sideBar": true,
	"navigate_menu": true,
	"moveMenuLeft": true,
	"colors": {
	"hover_highlight": "#DAA520",
	"selected_highlight": "#FFD700",
	"running_highlight": "#FF0000"
	},
	"nav_menu": {
	"height": "47px",
	"width": "254px"
	}
	},
	"language_info": {
	"name": "python",
	"file_extension": ".py",
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"mimetype": "text/x-python",
	"pygments_lexer": "ipython3",
	"nbconvert_exporter": "python",
	"version": "3.5.2"
	},
	"_draft": {
	"nbviewer_url": "https://gist.github.com/dc8e6c3d4a535ea07c5bf380fe7826f3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}