cdetrio/truthcoin event outcomes by SVD

## truthcoin event outcomes by SVD
{
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pandas as pd\n",
      "import numpy as np\n",
      "import scipy.stats\n",
      "from __future__ import division\n",
      "from sklearn import datasets\n",
      "from sklearn.decomposition import FactorAnalysis\n",
      "from sklearn.decomposition import PCA\n",
      "from sklearn.decomposition import KernelPCA\n",
      "%pylab inline\n",
      "\n",
      "import warnings\n",
      "warnings.filterwarnings(\"ignore\", category=DeprecationWarning) # annoying pandas bug"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Populating the interactive namespace from numpy and matplotlib\n"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "# TruthCoin\n",
      "**  https://github.com/psztorc/Truthcoin **\n",
      "\n",
      "## A simplified example of multi-event resolution\n",
      "  * no reputation / stake. uniform vote weights\n",
      "  * only binary events / discrete outcomes."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# events are columns of the voter matrix:\n",
      "#   col[0] = Obama is the u.s. president (2014),\n",
      "#   col[1] = Brazil won the 2014 fifa world cup,\n",
      "#   col[2] = Djokovic won the 2014 wimbledon tennis championship,\n",
      "#   col[3] = MtGox exchange goes insolvent (1Q 2014)\n",
      "#   col[4] = Professor bitcorn won his bet (\"I predict that Bitcoin will trade for under $10 a share by the first half of 2014\")\n",
      "#\n",
      "\n",
      "VoterMatrix = np.matrix([\n",
      "           [1, 0, 1, 1, 0], # first voter\n",
      "           [1, 1, 1, 1, 0], # ignorant about sports\n",
      "           [1, 0, 1, 1, 0],\n",
      "           [0, 0, 1, 1, 0], # republican in denial\n",
      "           [1, 0, 1, 1, 1]]) # prof bitcorn\n",
      "\n",
      "print VoterMatrix\n",
      "\n",
      "features = ['outcome_1', 'outcome_2', 'outcome_3', 'outcome_4', 'outcome_5']\n",
      "voteMatrix_pd = pd.DataFrame(VoterMatrix, columns=features)\n",
      "#raw['class'] = y\n",
      "print ' voteMatrix_pd:'\n",
      "print voteMatrix_pd"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "[[1 0 1 1 0]\n",
        " [1 1 1 1 0]\n",
        " [1 0 1 1 0]\n",
        " [0 0 1 1 0]\n",
        " [1 0 1 1 1]]\n",
        " voteMatrix_pd:\n",
        "   outcome_1  outcome_2  outcome_3  outcome_4  outcome_5\n",
        "0          1          0          1          1          0\n",
        "1          1          1          1          1          0\n",
        "2          1          0          1          1          0\n",
        "3          0          0          1          1          0\n",
        "4          1          0          1          1          1\n",
        "\n",
        "[5 rows x 5 columns]\n"
       ]
      }
     ],
     "prompt_number": 21
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Simple (non-SVD) event resolution\n",
      "  * using a uniform reputation / voter weighting"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def reWeight(Vec):\n",
      "    \"\"\"Get the relative influence of numbers, treat NaN as influence-less.\"\"\"\n",
      "    vec2 = np.array(Vec, dtype=float)\n",
      "    for i in range(len(Vec)):\n",
      "        if isnan(Vec[i]):\n",
      "            vec2[i] = 0\n",
      "\n",
      "    vec2sum = np.sum(vec2)\n",
      "    for i in range(len(vec2)):\n",
      "        vec2[i] = vec2[i] / vec2sum\n",
      "\n",
      "    return(vec2)\n",
      "\n",
      "\n",
      "rew = reWeight(np.array([1,1,1,1]))\n",
      "print \"reweighted vector test. uniform vector\", rew\n",
      "\n",
      "def getWeight(Vec, AddMean=0):\n",
      "    \"\"\"Takes an array (vector in practice), and returns proportional distance from zero.\"\"\"\n",
      "    New = abs(Vec)       #Absolute Value\n",
      "    if AddMean == 1:     #Add the mean to each element of the vector\n",
      "        New = New + mean(New)\n",
      "    if sum(New) == 0:    #Catch an error here\n",
      "        New = New + 1\n",
      "    New = New/sum(New)   #Normalize\n",
      "    return(New)\n",
      "\n",
      "\n",
      "uniformWeight = array([[1]]*len(VoterMatrix))\n",
      "print \"\\nuniform weights:\\n\", uniformWeight\n",
      "uniformReputation = getWeight(uniformWeight)\n",
      "print \"\\nuniform reputation:\\n\", uniformReputation\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "reweighted vector test. uniform vector [ 0.25  0.25  0.25  0.25]\n",
        "\n",
        "uniform weights:\n",
        "[[1]\n",
        " [1]\n",
        " [1]\n",
        " [1]\n",
        " [1]]\n",
        "\n",
        "uniform reputation:\n",
        "[[ 0.2]\n",
        " [ 0.2]\n",
        " [ 0.2]\n",
        " [ 0.2]\n",
        " [ 0.2]]\n"
       ]
      }
     ],
     "prompt_number": 16
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Weigh votes and resolve decisions.\n",
      "measure each decision by taking a dot product. essentially this just uses the average vote value among all voters. \n",
      "\n",
      "* an SVD result would change the reputation vector, but without SVD its a simple uniform vector (all votes equal)."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# port of GetDecisionOutcomes()  https://github.com/psztorc/Truthcoin/blob/master/lib/consensus/ConsensusMechanism.r#L139\n",
      "\n",
      "# VoterMatrix\n",
      "# we're using the regular matrix here. data is not even zero-centered.\n",
      "\n",
      "MaskedVoterMatrix = np.ma.masked_array(VoterMatrix, isnan(VoterMatrix))\n",
      "matrix_mask_thingie = -MaskedVoterMatrix[...,0].mask\n",
      "# not sure what the mask is for.\n",
      "# corresponds to https://github.com/psztorc/Truthcoin/blob/master/pylib/consensus/consensus.py#L113-L114\n",
      "row = reWeight( rep [ matrix_mask_thingie ] )\n",
      "print \"row:\", row\n",
      "col = MaskedVoterMatrix[matrix_mask_thingie, 0]\n",
      "print \"col:\", col\n",
      "\n",
      "\n",
      "decisions = []\n",
      "for i in range(VoterMatrix.shape[1]):\n",
      "    row = reWeight( rep [ -MaskedVoterMatrix[...,i].mask ] )\n",
      "    col = MaskedVoterMatrix[ -MaskedVoterMatrix[...,i].mask, i]\n",
      "    col = np.array(col, dtype=float)\n",
      "    row = np.transpose(row)[0]\n",
      "    decisions.append(np.dot(col, row))\n",
      "    \n",
      "print \"\\ndecisions:\"\n",
      "print decisions\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "row: [[ 0.2]\n",
        " [ 0.2]\n",
        " [ 0.2]\n",
        " [ 0.2]\n",
        " [ 0.2]]\n",
        "col: [[1 1 1 0 1]]\n",
        "\n",
        "decisions:\n",
        "[0.80000000000000004, 0.20000000000000001, 1.0, 1.0, 0.20000000000000001]\n"
       ]
      }
     ],
     "prompt_number": 11
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "* **that's the output for a simplified multi-decision resolution from votes on binary outcomes.**\n",
      "  - map values between [0,1] to one of {0, 0.5, 1}\n",
      "* **in this simplified version there is no reputation or vote stake amounts. every vote is equal ([0.2, 0.2, 0.2, 0.2, 0.2])**\n",
      "\n",
      "* an extended method would incorporate vote stake/deposit amounts to weigh votes. if we also add scaled/continous outcomes then the resulting consensus method would be a form of multi-decision SchellingCoin (or equivalently, TruthCoin without reputation)."
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "# SVD for Reputation based voting\n",
      "* SVD operates on a covariance matrix. covariance calc needs data matrix of normalized continuous values"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# normalize each feature/column to mean = 0, std = 1\n",
      "# data matrix needs to be normalized to get covariance and SVD\n",
      "normed = voteMatrix_pd.copy()\n",
      "for col in features:\n",
      "    #normed[col] = normed[col].apply(lambda x: (x - normed[col].mean()) / normed[col].std())\n",
      "    normed[col] = normed[col].apply(lambda x: (x - normed[col].mean()))\n",
      "\n",
      "# normed data matrix is only zero-centered (not auto-scaled)\n",
      "print '\\nNormalized dataset:'\n",
      "print normed[:5]\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "Normalized dataset:\n",
        "   outcome_1  outcome_2  outcome_3  outcome_4  outcome_5\n",
        "0        0.2       -0.2          0          0       -0.2\n",
        "1        0.2        0.8          0          0       -0.2\n",
        "2        0.2       -0.2          0          0       -0.2\n",
        "3       -0.8       -0.2          0          0       -0.2\n",
        "4        0.2       -0.2          0          0        0.8\n",
        "\n",
        "[5 rows x 5 columns]\n"
       ]
      }
     ],
     "prompt_number": 23
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "this covariance calc is from the [original pca example](http://nbviewer.ipython.org/github/tmsquasher/data-science-notebooks/blob/master/PCA%20Basics.ipynb)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "### since our data is already normalized, cov(x1, x2) = sum(x1*x2) / num_observations\n",
      "#   ^^ old assumption from the original example. valid here??\n",
      "cov_df = pd.DataFrame(index=features)\n",
      "for colA in features:\n",
      "    column = []\n",
      "    for colB in features:\n",
      "        cov = normed[colA].cov(normed[colB])\n",
      "        column.append(cov)\n",
      "    cov_df[colA] = column\n",
      "\n",
      "print 'Covariance matrix:'\n",
      "print cov_df\n",
      "# everybody agrees on outcomes 3 & 4 (tennis winner, mtgox solvency), so those columns have zero variance"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Covariance matrix:\n",
        "           outcome_1  outcome_2  outcome_3  outcome_4  outcome_5\n",
        "outcome_1       0.20       0.05          0          0       0.05\n",
        "outcome_2       0.05       0.20          0          0      -0.05\n",
        "outcome_3       0.00       0.00          0          0       0.00\n",
        "outcome_4       0.00       0.00          0          0       0.00\n",
        "outcome_5       0.05      -0.05          0          0       0.20\n",
        "\n",
        "[5 rows x 5 columns]\n"
       ]
      }
     ],
     "prompt_number": 24
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## 3. Singular Value Decomposition (SVD)\n",
      "\n",
      "** [U]: Rows are the original features and columns are the PCA 'components'. Each cell gives the 'loading' of the feature on the corresponding component. **\n",
      "\n",
      "** [S]: Represents how much variance is explained by each component. **"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# use numpy's SVD implementation\n",
      "u, s, v = scipy.linalg.svd(cov_df)\n",
      "print 'U: (feature loading for each component)'\n",
      "print pd.DataFrame(u, index=features) # first loading\n",
      "print '\\nExplained variance:\\n', s\n",
      "\n",
      "firstScore = np.transpose(np.dot(cov_df, u))[0]\n",
      "print \"\\nfirstScore:\"\n",
      "print firstScore\n",
      "\n",
      "Set1 = firstScore + abs(min(firstScore))\n",
      "print \"\\nSet1:\"\n",
      "print Set1\n",
      "Set2 = firstScore - max(firstScore)\n",
      "print \"Set2:\"\n",
      "print Set2"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "U: (feature loading for each component)\n",
        "                  0             1        2  3  4\n",
        "outcome_1 -0.816497 -8.088567e-17  0.57735  0  0\n",
        "outcome_2 -0.408248 -7.071068e-01 -0.57735  0  0\n",
        "outcome_3  0.000000  0.000000e+00  0.00000  1  0\n",
        "outcome_4  0.000000  0.000000e+00  0.00000  0  1\n",
        "outcome_5 -0.408248  7.071068e-01 -0.57735  0  0\n",
        "\n",
        "[5 rows x 5 columns]\n",
        "\n",
        "Explained variance:\n",
        "[ 0.25  0.25  0.1   0.    0.  ]\n",
        "\n",
        "firstScore:\n",
        "[-0.20412415 -0.10206207  0.          0.         -0.10206207]\n",
        "\n",
        "Set1:\n",
        "[ 0.          0.10206207  0.20412415  0.20412415  0.10206207]\n",
        "Set2:\n",
        "[-0.20412415 -0.10206207  0.          0.         -0.10206207]\n"
       ]
      }
     ],
     "prompt_number": 8
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# note on these two sets: https://github.com/psztorc/Truthcoin/blob/master/lib/consensus/ConsensusMechanism.r#L40-L51\n",
      "New1 = getWeight(np.dot(Set1, voteMatrix_pd))\n",
      "print \"\\nNew1:\"\n",
      "print New1\n",
      "New2 = getWeight(np.dot(Set2, voteMatrix_pd))\n",
      "print \"New2:\"\n",
      "print New2\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "New1:\n",
        "[ 0.22222222  0.05555556  0.33333333  0.33333333  0.05555556]\n",
        "New2:\n",
        "[ 0.28571429  0.07142857  0.28571429  0.28571429  0.07142857]\n"
       ]
      }
     ],
     "prompt_number": 10
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### incomplete. more calcs follow for adjusting voter reputations."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}
	{
	"metadata": {
	"name": ""
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"import pandas as pd\n",
	"import numpy as np\n",
	"import scipy.stats\n",
	"from __future__ import division\n",
	"from sklearn import datasets\n",
	"from sklearn.decomposition import FactorAnalysis\n",
	"from sklearn.decomposition import PCA\n",
	"from sklearn.decomposition import KernelPCA\n",
	"%pylab inline\n",
	"\n",
	"import warnings\n",
	"warnings.filterwarnings(\"ignore\", category=DeprecationWarning) # annoying pandas bug"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"Populating the interactive namespace from numpy and matplotlib\n"
	]
	}
	],
	"prompt_number": 2
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# TruthCoin\n",
	" https://github.com/psztorc/Truthcoin \n",
	"\n",
	"## A simplified example of multi-event resolution\n",
	" * no reputation / stake. uniform vote weights\n",
	" * only binary events / discrete outcomes."
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"# events are columns of the voter matrix:\n",
	"# col[0] = Obama is the u.s. president (2014),\n",
	"# col[1] = Brazil won the 2014 fifa world cup,\n",
	"# col[2] = Djokovic won the 2014 wimbledon tennis championship,\n",
	"# col[3] = MtGox exchange goes insolvent (1Q 2014)\n",
	"# col[4] = Professor bitcorn won his bet (\"I predict that Bitcoin will trade for under $10 a share by the first half of 2014\")\n",
	"#\n",
	"\n",
	"VoterMatrix = np.matrix([\n",
	" [1, 0, 1, 1, 0], # first voter\n",
	" [1, 1, 1, 1, 0], # ignorant about sports\n",
	" [1, 0, 1, 1, 0],\n",
	" [0, 0, 1, 1, 0], # republican in denial\n",
	" [1, 0, 1, 1, 1]]) # prof bitcorn\n",
	"\n",
	"print VoterMatrix\n",
	"\n",
	"features = ['outcome_1', 'outcome_2', 'outcome_3', 'outcome_4', 'outcome_5']\n",
	"voteMatrix_pd = pd.DataFrame(VoterMatrix, columns=features)\n",
	"#raw['class'] = y\n",
	"print ' voteMatrix_pd:'\n",
	"print voteMatrix_pd"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"[[1 0 1 1 0]\n",
	" [1 1 1 1 0]\n",
	" [1 0 1 1 0]\n",
	" [0 0 1 1 0]\n",
	" [1 0 1 1 1]]\n",
	" voteMatrix_pd:\n",
	" outcome_1 outcome_2 outcome_3 outcome_4 outcome_5\n",
	"0 1 0 1 1 0\n",
	"1 1 1 1 1 0\n",
	"2 1 0 1 1 0\n",
	"3 0 0 1 1 0\n",
	"4 1 0 1 1 1\n",
	"\n",
	"[5 rows x 5 columns]\n"
	]
	}
	],
	"prompt_number": 21
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Simple (non-SVD) event resolution\n",
	" * using a uniform reputation / voter weighting"
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"def reWeight(Vec):\n",
	" \"\"\"Get the relative influence of numbers, treat NaN as influence-less.\"\"\"\n",
	" vec2 = np.array(Vec, dtype=float)\n",
	" for i in range(len(Vec)):\n",
	" if isnan(Vec[i]):\n",
	" vec2[i] = 0\n",
	"\n",
	" vec2sum = np.sum(vec2)\n",
	" for i in range(len(vec2)):\n",
	" vec2[i] = vec2[i] / vec2sum\n",
	"\n",
	" return(vec2)\n",
	"\n",
	"\n",
	"rew = reWeight(np.array([1,1,1,1]))\n",
	"print \"reweighted vector test. uniform vector\", rew\n",
	"\n",
	"def getWeight(Vec, AddMean=0):\n",
	" \"\"\"Takes an array (vector in practice), and returns proportional distance from zero.\"\"\"\n",
	" New = abs(Vec) #Absolute Value\n",
	" if AddMean == 1: #Add the mean to each element of the vector\n",
	" New = New + mean(New)\n",
	" if sum(New) == 0: #Catch an error here\n",
	" New = New + 1\n",
	" New = New/sum(New) #Normalize\n",
	" return(New)\n",
	"\n",
	"\n",
	"uniformWeight = array([[1]]*len(VoterMatrix))\n",
	"print \"\\nuniform weights:\\n\", uniformWeight\n",
	"uniformReputation = getWeight(uniformWeight)\n",
	"print \"\\nuniform reputation:\\n\", uniformReputation\n"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"reweighted vector test. uniform vector [ 0.25 0.25 0.25 0.25]\n",
	"\n",
	"uniform weights:\n",
	"[[1]\n",
	" [1]\n",
	" [1]\n",
	" [1]\n",
	" [1]]\n",
	"\n",
	"uniform reputation:\n",
	"[[ 0.2]\n",
	" [ 0.2]\n",
	" [ 0.2]\n",
	" [ 0.2]\n",
	" [ 0.2]]\n"
	]
	}
	],
	"prompt_number": 16
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Weigh votes and resolve decisions.\n",
	"measure each decision by taking a dot product. essentially this just uses the average vote value among all voters. \n",
	"\n",
	"* an SVD result would change the reputation vector, but without SVD its a simple uniform vector (all votes equal)."
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"# port of GetDecisionOutcomes() https://github.com/psztorc/Truthcoin/blob/master/lib/consensus/ConsensusMechanism.r#L139\n",
	"\n",
	"# VoterMatrix\n",
	"# we're using the regular matrix here. data is not even zero-centered.\n",
	"\n",
	"MaskedVoterMatrix = np.ma.masked_array(VoterMatrix, isnan(VoterMatrix))\n",
	"matrix_mask_thingie = -MaskedVoterMatrix[...,0].mask\n",
	"# not sure what the mask is for.\n",
	"# corresponds to https://github.com/psztorc/Truthcoin/blob/master/pylib/consensus/consensus.py#L113-L114\n",
	"row = reWeight( rep [ matrix_mask_thingie ] )\n",
	"print \"row:\", row\n",
	"col = MaskedVoterMatrix[matrix_mask_thingie, 0]\n",
	"print \"col:\", col\n",
	"\n",
	"\n",
	"decisions = []\n",
	"for i in range(VoterMatrix.shape[1]):\n",
	" row = reWeight( rep [ -MaskedVoterMatrix[...,i].mask ] )\n",
	" col = MaskedVoterMatrix[ -MaskedVoterMatrix[...,i].mask, i]\n",
	" col = np.array(col, dtype=float)\n",
	" row = np.transpose(row)[0]\n",
	" decisions.append(np.dot(col, row))\n",
	" \n",
	"print \"\\ndecisions:\"\n",
	"print decisions\n"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"row: [[ 0.2]\n",
	" [ 0.2]\n",
	" [ 0.2]\n",
	" [ 0.2]\n",
	" [ 0.2]]\n",
	"col: [[1 1 1 0 1]]\n",
	"\n",
	"decisions:\n",
	"[0.80000000000000004, 0.20000000000000001, 1.0, 1.0, 0.20000000000000001]\n"
	]
	}
	],
	"prompt_number": 11
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"* that's the output for a simplified multi-decision resolution from votes on binary outcomes.\n",
	" - map values between [0,1] to one of {0, 0.5, 1}\n",
	"* in this simplified version there is no reputation or vote stake amounts. every vote is equal ([0.2, 0.2, 0.2, 0.2, 0.2])\n",
	"\n",
	"* an extended method would incorporate vote stake/deposit amounts to weigh votes. if we also add scaled/continous outcomes then the resulting consensus method would be a form of multi-decision SchellingCoin (or equivalently, TruthCoin without reputation)."
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# SVD for Reputation based voting\n",
	"* SVD operates on a covariance matrix. covariance calc needs data matrix of normalized continuous values"
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"# normalize each feature/column to mean = 0, std = 1\n",
	"# data matrix needs to be normalized to get covariance and SVD\n",
	"normed = voteMatrix_pd.copy()\n",
	"for col in features:\n",
	" #normed[col] = normed[col].apply(lambda x: (x - normed[col].mean()) / normed[col].std())\n",
	" normed[col] = normed[col].apply(lambda x: (x - normed[col].mean()))\n",
	"\n",
	"# normed data matrix is only zero-centered (not auto-scaled)\n",
	"print '\\nNormalized dataset:'\n",
	"print normed[:5]\n"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"\n",
	"Normalized dataset:\n",
	" outcome_1 outcome_2 outcome_3 outcome_4 outcome_5\n",
	"0 0.2 -0.2 0 0 -0.2\n",
	"1 0.2 0.8 0 0 -0.2\n",
	"2 0.2 -0.2 0 0 -0.2\n",
	"3 -0.8 -0.2 0 0 -0.2\n",
	"4 0.2 -0.2 0 0 0.8\n",
	"\n",
	"[5 rows x 5 columns]\n"
	]
	}
	],
	"prompt_number": 23
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"this covariance calc is from the [original pca example](http://nbviewer.ipython.org/github/tmsquasher/data-science-notebooks/blob/master/PCA%20Basics.ipynb)"
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"### since our data is already normalized, cov(x1, x2) = sum(x1*x2) / num_observations\n",
	"# ^^ old assumption from the original example. valid here??\n",
	"cov_df = pd.DataFrame(index=features)\n",
	"for colA in features:\n",
	" column = []\n",
	" for colB in features:\n",
	" cov = normed[colA].cov(normed[colB])\n",
	" column.append(cov)\n",
	" cov_df[colA] = column\n",
	"\n",
	"print 'Covariance matrix:'\n",
	"print cov_df\n",
	"# everybody agrees on outcomes 3 & 4 (tennis winner, mtgox solvency), so those columns have zero variance"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"Covariance matrix:\n",
	" outcome_1 outcome_2 outcome_3 outcome_4 outcome_5\n",
	"outcome_1 0.20 0.05 0 0 0.05\n",
	"outcome_2 0.05 0.20 0 0 -0.05\n",
	"outcome_3 0.00 0.00 0 0 0.00\n",
	"outcome_4 0.00 0.00 0 0 0.00\n",
	"outcome_5 0.05 -0.05 0 0 0.20\n",
	"\n",
	"[5 rows x 5 columns]\n"
	]
	}
	],
	"prompt_number": 24
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## 3. Singular Value Decomposition (SVD)\n",
	"\n",
	" [U]: Rows are the original features and columns are the PCA 'components'. Each cell gives the 'loading' of the feature on the corresponding component. \n",
	"\n",
	" [S]: Represents how much variance is explained by each component. "
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"# use numpy's SVD implementation\n",
	"u, s, v = scipy.linalg.svd(cov_df)\n",
	"print 'U: (feature loading for each component)'\n",
	"print pd.DataFrame(u, index=features) # first loading\n",
	"print '\\nExplained variance:\\n', s\n",
	"\n",
	"firstScore = np.transpose(np.dot(cov_df, u))[0]\n",
	"print \"\\nfirstScore:\"\n",
	"print firstScore\n",
	"\n",
	"Set1 = firstScore + abs(min(firstScore))\n",
	"print \"\\nSet1:\"\n",
	"print Set1\n",
	"Set2 = firstScore - max(firstScore)\n",
	"print \"Set2:\"\n",
	"print Set2"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"U: (feature loading for each component)\n",
	" 0 1 2 3 4\n",
	"outcome_1 -0.816497 -8.088567e-17 0.57735 0 0\n",
	"outcome_2 -0.408248 -7.071068e-01 -0.57735 0 0\n",
	"outcome_3 0.000000 0.000000e+00 0.00000 1 0\n",
	"outcome_4 0.000000 0.000000e+00 0.00000 0 1\n",
	"outcome_5 -0.408248 7.071068e-01 -0.57735 0 0\n",
	"\n",
	"[5 rows x 5 columns]\n",
	"\n",
	"Explained variance:\n",
	"[ 0.25 0.25 0.1 0. 0. ]\n",
	"\n",
	"firstScore:\n",
	"[-0.20412415 -0.10206207 0. 0. -0.10206207]\n",
	"\n",
	"Set1:\n",
	"[ 0. 0.10206207 0.20412415 0.20412415 0.10206207]\n",
	"Set2:\n",
	"[-0.20412415 -0.10206207 0. 0. -0.10206207]\n"
	]
	}
	],
	"prompt_number": 8
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"# note on these two sets: https://github.com/psztorc/Truthcoin/blob/master/lib/consensus/ConsensusMechanism.r#L40-L51\n",
	"New1 = getWeight(np.dot(Set1, voteMatrix_pd))\n",
	"print \"\\nNew1:\"\n",
	"print New1\n",
	"New2 = getWeight(np.dot(Set2, voteMatrix_pd))\n",
	"print \"New2:\"\n",
	"print New2\n"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"\n",
	"New1:\n",
	"[ 0.22222222 0.05555556 0.33333333 0.33333333 0.05555556]\n",
	"New2:\n",
	"[ 0.28571429 0.07142857 0.28571429 0.28571429 0.07142857]\n"
	]
	}
	],
	"prompt_number": 10
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### incomplete. more calcs follow for adjusting voter reputations."
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [],
	"language": "python",
	"metadata": {},
	"outputs": []
	}
	],
	"metadata": {}
	}
	]
	}