sergeyk/gist:6897292

## gistfile1.txt
{
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Payout matrix for the modified game from HW2 problem 3b\n",
      "A = np.array([\n",
      "    [0, 1, -1],\n",
      "    [-1, 0, 1],\n",
      "    [1, -1, 0],\n",
      "    [0, -1, -1]\n",
      "])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 120
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Normalize the matrix to bound losses on [0, 1]\n",
      "A -= A.min()\n",
      "A = A / float(A.max() - A.min())\n",
      "A"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 121,
       "text": [
        "array([[ 0.5,  1. ,  0. ],\n",
        "       [ 0. ,  0.5,  1. ],\n",
        "       [ 1. ,  0. ,  0.5],\n",
        "       [ 0.5,  0. ,  0. ]])"
       ]
      }
     ],
     "prompt_number": 121
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Initialize row player weights and normalize them.\n",
      "weights = np.ones(A.shape[0])\n",
      "weights /= weights.sum()\n",
      "weights"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 122,
       "text": [
        "array([ 0.25,  0.25,  0.25,  0.25])"
       ]
      }
     ],
     "prompt_number": 122
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "epsilon = 0.1\n",
      "T = np.ceil(100 * np.log(4)).astype(int)\n",
      "\n",
      "weights_history = []\n",
      "y_history = []\n",
      "deltas = []\n",
      "deltas2 = []\n",
      "for t in range(T + 1):\n",
      "    # Record current weights.\n",
      "    weights_history.append(weights.copy())\n",
      "    \n",
      "    # Find the best y response and figure out the loss for all experts.\n",
      "    best_y = np.dot(weights, A).argmax()\n",
      "    y = np.zeros(A.shape[1])\n",
      "    y[best_y] = 1\n",
      "    y_history.append(y)\n",
      "    \n",
      "    # x is the experts' distribution\n",
      "    # C(x) is given by the argmax y picked in response to the above x\n",
      "    # R(y) is given by the argmin x picked in response to the above y\n",
      "    # Is this interpretation correct?\n",
      "    Cx = np.dot(weights, A).max()\n",
      "    Ry = losses.min()\n",
      "    deltas.append(Cx - Ry)\n",
      "    \n",
      "    # Perhaps my above interpretation is not correct,\n",
      "    # and I should be using the average strategy up to t.\n",
      "    # C(x*) is given by argmax y in response to 1/t \\sum_i x_t\n",
      "    # R(y*) is given by argmin x in response to 1/t \\sum_i y_t\n",
      "    avg_x = np.array(weights_history).sum(0)\n",
      "    avg_x /= avg_x.sum()\n",
      "    avg_y = np.array(y_history).sum(0)\n",
      "    avg_y /= avg_y.sum()\n",
      "    Cx = np.dot(avg_x, A).argmax()\n",
      "    Ry = np.dot(A, avg_y).argmin()\n",
      "    deltas2.append(Cx - Ry)\n",
      "    \n",
      "    # Update weights.\n",
      "    weights *= np.power((1 - epsilon), losses)\n",
      "    weights /= weights.sum()\n",
      "\n",
      "for t_ in [10, 100, T]:\n",
      "    print('t\\t\\t{}'.format(t_))\n",
      "    print('x\\t\\t{}'.format(weights_history[t_]))\n",
      "    print('y\\t\\t{}'.format(y_history[t_]))\n",
      "    print('Cx - Ry\\t\\t{}'.format(deltas[t_]))\n",
      "    print('C(x*) - R(y*)\\t{}'.format(deltas2[t_]))\n",
      "    print"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "t\t\t10\n",
        "x\t\t[  2.38855279e-01   3.92454138e-27   4.85511979e-20   7.61144721e-01]\n",
        "y\t\t[ 1.  0.  0.]\n",
        "Cx - Ry\t\t0.5\n",
        "C(x*) - R(y*)\t-1\n",
        "\n",
        "t\t\t100\n",
        "x\t\t[  2.38855279e-01   2.98961155e-31   4.23753086e-22   7.61144721e-01]\n",
        "y\t\t[ 1.  0.  0.]\n",
        "Cx - Ry\t\t0.5\n",
        "C(x*) - R(y*)\t-1\n",
        "\n",
        "t\t\t139\n",
        "x\t\t[  2.38855279e-01   4.90989981e-33   5.43052489e-23   7.61144721e-01]\n",
        "y\t\t[ 1.  0.  0.]\n",
        "Cx - Ry\t\t0.5\n",
        "C(x*) - R(y*)\t-1\n",
        "\n"
       ]
      }
     ],
     "prompt_number": 129
    }
   ],
   "metadata": {}
  }
 ]
}
	{
	"metadata": {
	"name": ""
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"# Payout matrix for the modified game from HW2 problem 3b\n",
	"A = np.array([\n",
	" [0, 1, -1],\n",
	" [-1, 0, 1],\n",
	" [1, -1, 0],\n",
	" [0, -1, -1]\n",
	"])"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 120
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"# Normalize the matrix to bound losses on [0, 1]\n",
	"A -= A.min()\n",
	"A = A / float(A.max() - A.min())\n",
	"A"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"metadata": {},
	"output_type": "pyout",
	"prompt_number": 121,
	"text": [
	"array([[ 0.5, 1. , 0. ],\n",
	" [ 0. , 0.5, 1. ],\n",
	" [ 1. , 0. , 0.5],\n",
	" [ 0.5, 0. , 0. ]])"
	]
	}
	],
	"prompt_number": 121
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"# Initialize row player weights and normalize them.\n",
	"weights = np.ones(A.shape[0])\n",
	"weights /= weights.sum()\n",
	"weights"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"metadata": {},
	"output_type": "pyout",
	"prompt_number": 122,
	"text": [
	"array([ 0.25, 0.25, 0.25, 0.25])"
	]
	}
	],
	"prompt_number": 122
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"epsilon = 0.1\n",
	"T = np.ceil(100 * np.log(4)).astype(int)\n",
	"\n",
	"weights_history = []\n",
	"y_history = []\n",
	"deltas = []\n",
	"deltas2 = []\n",
	"for t in range(T + 1):\n",
	" # Record current weights.\n",
	" weights_history.append(weights.copy())\n",
	" \n",
	" # Find the best y response and figure out the loss for all experts.\n",
	" best_y = np.dot(weights, A).argmax()\n",
	" y = np.zeros(A.shape[1])\n",
	" y[best_y] = 1\n",
	" y_history.append(y)\n",
	" \n",
	" # x is the experts' distribution\n",
	" # C(x) is given by the argmax y picked in response to the above x\n",
	" # R(y) is given by the argmin x picked in response to the above y\n",
	" # Is this interpretation correct?\n",
	" Cx = np.dot(weights, A).max()\n",
	" Ry = losses.min()\n",
	" deltas.append(Cx - Ry)\n",
	" \n",
	" # Perhaps my above interpretation is not correct,\n",
	" # and I should be using the average strategy up to t.\n",
	" # C(x*) is given by argmax y in response to 1/t \\sum_i x_t\n",
	" # R(y*) is given by argmin x in response to 1/t \\sum_i y_t\n",
	" avg_x = np.array(weights_history).sum(0)\n",
	" avg_x /= avg_x.sum()\n",
	" avg_y = np.array(y_history).sum(0)\n",
	" avg_y /= avg_y.sum()\n",
	" Cx = np.dot(avg_x, A).argmax()\n",
	" Ry = np.dot(A, avg_y).argmin()\n",
	" deltas2.append(Cx - Ry)\n",
	" \n",
	" # Update weights.\n",
	" weights *= np.power((1 - epsilon), losses)\n",
	" weights /= weights.sum()\n",
	"\n",
	"for t_ in [10, 100, T]:\n",
	" print('t\\t\\t{}'.format(t_))\n",
	" print('x\\t\\t{}'.format(weights_history[t_]))\n",
	" print('y\\t\\t{}'.format(y_history[t_]))\n",
	" print('Cx - Ry\\t\\t{}'.format(deltas[t_]))\n",
	" print('C(x) - R(y)\\t{}'.format(deltas2[t_]))\n",
	" print"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"t\t\t10\n",
	"x\t\t[ 2.38855279e-01 3.92454138e-27 4.85511979e-20 7.61144721e-01]\n",
	"y\t\t[ 1. 0. 0.]\n",
	"Cx - Ry\t\t0.5\n",
	"C(x) - R(y)\t-1\n",
	"\n",
	"t\t\t100\n",
	"x\t\t[ 2.38855279e-01 2.98961155e-31 4.23753086e-22 7.61144721e-01]\n",
	"y\t\t[ 1. 0. 0.]\n",
	"Cx - Ry\t\t0.5\n",
	"C(x) - R(y)\t-1\n",
	"\n",
	"t\t\t139\n",
	"x\t\t[ 2.38855279e-01 4.90989981e-33 5.43052489e-23 7.61144721e-01]\n",
	"y\t\t[ 1. 0. 0.]\n",
	"Cx - Ry\t\t0.5\n",
	"C(x) - R(y)\t-1\n",
	"\n"
	]
	}
	],
	"prompt_number": 129
	}
	],
	"metadata": {}
	}
	]
	}