arcarchit/gym_env.ipynb

## gym_env.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Open AI provides framework for creating environment and training on that environment. In this post I am pasting a simple notebook for a quick look up on how to use this environments and what all functions are available on environment object.\n",
    "\n",
    "I have used environment available on github by Denny Britz. https://github.com/dennybritz/reinforcement-learning/blob/master/lib/envs/gridworld.py \n",
    " "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import sys\n",
    "if \"../reinforcement-learning\" not in sys.path:\n",
    "  sys.path.append(\"../reinforcement-learning\") \n",
    "from lib.envs.gridworld import GridworldEnv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "env = GridworldEnv()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Discrete(4)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.action_space"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "T  o  o  o\n",
      "o  o  o  o\n",
      "o  x  o  o\n",
      "o  o  o  T\n"
     ]
    }
   ],
   "source": [
    "env.render()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "16"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.nS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.nA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "observation, reward, done, info = env.step(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "observation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-1.0"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reward"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "done"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'prob': 1.0}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "T  o  o  o\n",
      "o  o  o  o\n",
      "o  o  x  o\n",
      "o  o  o  T\n"
     ]
    }
   ],
   "source": [
    "env.render()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.action_space.sample()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Discrete(16)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.observation_space"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(1.0, 2, -1.0, False)]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.P[1][1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0: {0: [(1.0, 0, 0.0, True)],\n",
       "  1: [(1.0, 0, 0.0, True)],\n",
       "  2: [(1.0, 0, 0.0, True)],\n",
       "  3: [(1.0, 0, 0.0, True)]},\n",
       " 1: {0: [(1.0, 1, -1.0, False)],\n",
       "  1: [(1.0, 2, -1.0, False)],\n",
       "  2: [(1.0, 5, -1.0, False)],\n",
       "  3: [(1.0, 0, -1.0, True)]},\n",
       " 2: {0: [(1.0, 2, -1.0, False)],\n",
       "  1: [(1.0, 3, -1.0, False)],\n",
       "  2: [(1.0, 6, -1.0, False)],\n",
       "  3: [(1.0, 1, -1.0, False)]},\n",
       " 3: {0: [(1.0, 3, -1.0, False)],\n",
       "  1: [(1.0, 3, -1.0, False)],\n",
       "  2: [(1.0, 7, -1.0, False)],\n",
       "  3: [(1.0, 2, -1.0, False)]},\n",
       " 4: {0: [(1.0, 0, -1.0, True)],\n",
       "  1: [(1.0, 5, -1.0, False)],\n",
       "  2: [(1.0, 8, -1.0, False)],\n",
       "  3: [(1.0, 4, -1.0, False)]},\n",
       " 5: {0: [(1.0, 1, -1.0, False)],\n",
       "  1: [(1.0, 6, -1.0, False)],\n",
       "  2: [(1.0, 9, -1.0, False)],\n",
       "  3: [(1.0, 4, -1.0, False)]},\n",
       " 6: {0: [(1.0, 2, -1.0, False)],\n",
       "  1: [(1.0, 7, -1.0, False)],\n",
       "  2: [(1.0, 10, -1.0, False)],\n",
       "  3: [(1.0, 5, -1.0, False)]},\n",
       " 7: {0: [(1.0, 3, -1.0, False)],\n",
       "  1: [(1.0, 7, -1.0, False)],\n",
       "  2: [(1.0, 11, -1.0, False)],\n",
       "  3: [(1.0, 6, -1.0, False)]},\n",
       " 8: {0: [(1.0, 4, -1.0, False)],\n",
       "  1: [(1.0, 9, -1.0, False)],\n",
       "  2: [(1.0, 12, -1.0, False)],\n",
       "  3: [(1.0, 8, -1.0, False)]},\n",
       " 9: {0: [(1.0, 5, -1.0, False)],\n",
       "  1: [(1.0, 10, -1.0, False)],\n",
       "  2: [(1.0, 13, -1.0, False)],\n",
       "  3: [(1.0, 8, -1.0, False)]},\n",
       " 10: {0: [(1.0, 6, -1.0, False)],\n",
       "  1: [(1.0, 11, -1.0, False)],\n",
       "  2: [(1.0, 14, -1.0, False)],\n",
       "  3: [(1.0, 9, -1.0, False)]},\n",
       " 11: {0: [(1.0, 7, -1.0, False)],\n",
       "  1: [(1.0, 11, -1.0, False)],\n",
       "  2: [(1.0, 15, -1.0, True)],\n",
       "  3: [(1.0, 10, -1.0, False)]},\n",
       " 12: {0: [(1.0, 8, -1.0, False)],\n",
       "  1: [(1.0, 13, -1.0, False)],\n",
       "  2: [(1.0, 12, -1.0, False)],\n",
       "  3: [(1.0, 12, -1.0, False)]},\n",
       " 13: {0: [(1.0, 9, -1.0, False)],\n",
       "  1: [(1.0, 14, -1.0, False)],\n",
       "  2: [(1.0, 13, -1.0, False)],\n",
       "  3: [(1.0, 12, -1.0, False)]},\n",
       " 14: {0: [(1.0, 10, -1.0, False)],\n",
       "  1: [(1.0, 15, -1.0, True)],\n",
       "  2: [(1.0, 14, -1.0, False)],\n",
       "  3: [(1.0, 13, -1.0, False)]},\n",
       " 15: {0: [(1.0, 15, 0.0, True)],\n",
       "  1: [(1.0, 15, 0.0, True)],\n",
       "  2: [(1.0, 15, 0.0, True)],\n",
       "  3: [(1.0, 15, 0.0, True)]}}"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.P"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.reset()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "T  x  o  o\n",
      "o  o  o  o\n",
      "o  o  o  o\n",
      "o  o  o  T\n"
     ]
    }
   ],
   "source": [
    "env.render()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Open AI provides framework for creating environment and training on that environment. In this post I am pasting a simple notebook for a quick look up on how to use this environments and what all functions are available on environment object.\n",
	"\n",
	"I have used environment available on github by Denny Britz. https://github.com/dennybritz/reinforcement-learning/blob/master/lib/envs/gridworld.py \n",
	" "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import numpy as np\n",
	"import sys\n",
	"if \"../reinforcement-learning\" not in sys.path:\n",
	" sys.path.append(\"../reinforcement-learning\") \n",
	"from lib.envs.gridworld import GridworldEnv"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"env = GridworldEnv()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"Discrete(4)"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"env.action_space"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"T o o o\n",
	"o o o o\n",
	"o x o o\n",
	"o o o T\n"
	]
	}
	],
	"source": [
	"env.render()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"16"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"env.nS"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"4"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"env.nA"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"observation, reward, done, info = env.step(1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"10"
	]
	},
	"execution_count": 12,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"observation"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"-1.0"
	]
	},
	"execution_count": 13,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"reward"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"False"
	]
	},
	"execution_count": 14,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"done"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"{'prob': 1.0}"
	]
	},
	"execution_count": 15,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"info"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"T o o o\n",
	"o o o o\n",
	"o o x o\n",
	"o o o T\n"
	]
	}
	],
	"source": [
	"env.render()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"3"
	]
	},
	"execution_count": 21,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"env.action_space.sample()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"Discrete(16)"
	]
	},
	"execution_count": 22,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"env.observation_space"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 27,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[(1.0, 2, -1.0, False)]"
	]
	},
	"execution_count": 27,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"env.P[1][1]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"{0: {0: [(1.0, 0, 0.0, True)],\n",
	" 1: [(1.0, 0, 0.0, True)],\n",
	" 2: [(1.0, 0, 0.0, True)],\n",
	" 3: [(1.0, 0, 0.0, True)]},\n",
	" 1: {0: [(1.0, 1, -1.0, False)],\n",
	" 1: [(1.0, 2, -1.0, False)],\n",
	" 2: [(1.0, 5, -1.0, False)],\n",
	" 3: [(1.0, 0, -1.0, True)]},\n",
	" 2: {0: [(1.0, 2, -1.0, False)],\n",
	" 1: [(1.0, 3, -1.0, False)],\n",
	" 2: [(1.0, 6, -1.0, False)],\n",
	" 3: [(1.0, 1, -1.0, False)]},\n",
	" 3: {0: [(1.0, 3, -1.0, False)],\n",
	" 1: [(1.0, 3, -1.0, False)],\n",
	" 2: [(1.0, 7, -1.0, False)],\n",
	" 3: [(1.0, 2, -1.0, False)]},\n",
	" 4: {0: [(1.0, 0, -1.0, True)],\n",
	" 1: [(1.0, 5, -1.0, False)],\n",
	" 2: [(1.0, 8, -1.0, False)],\n",
	" 3: [(1.0, 4, -1.0, False)]},\n",
	" 5: {0: [(1.0, 1, -1.0, False)],\n",
	" 1: [(1.0, 6, -1.0, False)],\n",
	" 2: [(1.0, 9, -1.0, False)],\n",
	" 3: [(1.0, 4, -1.0, False)]},\n",
	" 6: {0: [(1.0, 2, -1.0, False)],\n",
	" 1: [(1.0, 7, -1.0, False)],\n",
	" 2: [(1.0, 10, -1.0, False)],\n",
	" 3: [(1.0, 5, -1.0, False)]},\n",
	" 7: {0: [(1.0, 3, -1.0, False)],\n",
	" 1: [(1.0, 7, -1.0, False)],\n",
	" 2: [(1.0, 11, -1.0, False)],\n",
	" 3: [(1.0, 6, -1.0, False)]},\n",
	" 8: {0: [(1.0, 4, -1.0, False)],\n",
	" 1: [(1.0, 9, -1.0, False)],\n",
	" 2: [(1.0, 12, -1.0, False)],\n",
	" 3: [(1.0, 8, -1.0, False)]},\n",
	" 9: {0: [(1.0, 5, -1.0, False)],\n",
	" 1: [(1.0, 10, -1.0, False)],\n",
	" 2: [(1.0, 13, -1.0, False)],\n",
	" 3: [(1.0, 8, -1.0, False)]},\n",
	" 10: {0: [(1.0, 6, -1.0, False)],\n",
	" 1: [(1.0, 11, -1.0, False)],\n",
	" 2: [(1.0, 14, -1.0, False)],\n",
	" 3: [(1.0, 9, -1.0, False)]},\n",
	" 11: {0: [(1.0, 7, -1.0, False)],\n",
	" 1: [(1.0, 11, -1.0, False)],\n",
	" 2: [(1.0, 15, -1.0, True)],\n",
	" 3: [(1.0, 10, -1.0, False)]},\n",
	" 12: {0: [(1.0, 8, -1.0, False)],\n",
	" 1: [(1.0, 13, -1.0, False)],\n",
	" 2: [(1.0, 12, -1.0, False)],\n",
	" 3: [(1.0, 12, -1.0, False)]},\n",
	" 13: {0: [(1.0, 9, -1.0, False)],\n",
	" 1: [(1.0, 14, -1.0, False)],\n",
	" 2: [(1.0, 13, -1.0, False)],\n",
	" 3: [(1.0, 12, -1.0, False)]},\n",
	" 14: {0: [(1.0, 10, -1.0, False)],\n",
	" 1: [(1.0, 15, -1.0, True)],\n",
	" 2: [(1.0, 14, -1.0, False)],\n",
	" 3: [(1.0, 13, -1.0, False)]},\n",
	" 15: {0: [(1.0, 15, 0.0, True)],\n",
	" 1: [(1.0, 15, 0.0, True)],\n",
	" 2: [(1.0, 15, 0.0, True)],\n",
	" 3: [(1.0, 15, 0.0, True)]}}"
	]
	},
	"execution_count": 26,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"env.P"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"1"
	]
	},
	"execution_count": 24,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"env.reset()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 25,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"T x o o\n",
	"o o o o\n",
	"o o o o\n",
	"o o o T\n"
	]
	}
	],
	"source": [
	"env.render()"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.13"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}