Skip to content

Instantly share code, notes, and snippets.

@arcarchit
Last active May 7, 2020 10:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arcarchit/2b3363e2615df7ef5c8d4941d4dfa9e8 to your computer and use it in GitHub Desktop.
Save arcarchit/2b3363e2615df7ef5c8d4941d4dfa9e8 to your computer and use it in GitHub Desktop.
Open AI gym env object
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Open AI provides framework for creating environment and training on that environment. In this post I am pasting a simple notebook for a quick look up on how to use this environments and what all functions are available on environment object.\n",
"\n",
"I have used environment available on github by Denny Britz. https://github.com/dennybritz/reinforcement-learning/blob/master/lib/envs/gridworld.py \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import sys\n",
"if \"../reinforcement-learning\" not in sys.path:\n",
" sys.path.append(\"../reinforcement-learning\") \n",
"from lib.envs.gridworld import GridworldEnv"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"env = GridworldEnv()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Discrete(4)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"env.action_space"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"T o o o\n",
"o o o o\n",
"o x o o\n",
"o o o T\n"
]
}
],
"source": [
"env.render()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"16"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"env.nS"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"env.nA"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"observation, reward, done, info = env.step(1)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"observation"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-1.0"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reward"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"done"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'prob': 1.0}"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"info"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"T o o o\n",
"o o o o\n",
"o o x o\n",
"o o o T\n"
]
}
],
"source": [
"env.render()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"env.action_space.sample()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Discrete(16)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"env.observation_space"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(1.0, 2, -1.0, False)]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"env.P[1][1]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{0: {0: [(1.0, 0, 0.0, True)],\n",
" 1: [(1.0, 0, 0.0, True)],\n",
" 2: [(1.0, 0, 0.0, True)],\n",
" 3: [(1.0, 0, 0.0, True)]},\n",
" 1: {0: [(1.0, 1, -1.0, False)],\n",
" 1: [(1.0, 2, -1.0, False)],\n",
" 2: [(1.0, 5, -1.0, False)],\n",
" 3: [(1.0, 0, -1.0, True)]},\n",
" 2: {0: [(1.0, 2, -1.0, False)],\n",
" 1: [(1.0, 3, -1.0, False)],\n",
" 2: [(1.0, 6, -1.0, False)],\n",
" 3: [(1.0, 1, -1.0, False)]},\n",
" 3: {0: [(1.0, 3, -1.0, False)],\n",
" 1: [(1.0, 3, -1.0, False)],\n",
" 2: [(1.0, 7, -1.0, False)],\n",
" 3: [(1.0, 2, -1.0, False)]},\n",
" 4: {0: [(1.0, 0, -1.0, True)],\n",
" 1: [(1.0, 5, -1.0, False)],\n",
" 2: [(1.0, 8, -1.0, False)],\n",
" 3: [(1.0, 4, -1.0, False)]},\n",
" 5: {0: [(1.0, 1, -1.0, False)],\n",
" 1: [(1.0, 6, -1.0, False)],\n",
" 2: [(1.0, 9, -1.0, False)],\n",
" 3: [(1.0, 4, -1.0, False)]},\n",
" 6: {0: [(1.0, 2, -1.0, False)],\n",
" 1: [(1.0, 7, -1.0, False)],\n",
" 2: [(1.0, 10, -1.0, False)],\n",
" 3: [(1.0, 5, -1.0, False)]},\n",
" 7: {0: [(1.0, 3, -1.0, False)],\n",
" 1: [(1.0, 7, -1.0, False)],\n",
" 2: [(1.0, 11, -1.0, False)],\n",
" 3: [(1.0, 6, -1.0, False)]},\n",
" 8: {0: [(1.0, 4, -1.0, False)],\n",
" 1: [(1.0, 9, -1.0, False)],\n",
" 2: [(1.0, 12, -1.0, False)],\n",
" 3: [(1.0, 8, -1.0, False)]},\n",
" 9: {0: [(1.0, 5, -1.0, False)],\n",
" 1: [(1.0, 10, -1.0, False)],\n",
" 2: [(1.0, 13, -1.0, False)],\n",
" 3: [(1.0, 8, -1.0, False)]},\n",
" 10: {0: [(1.0, 6, -1.0, False)],\n",
" 1: [(1.0, 11, -1.0, False)],\n",
" 2: [(1.0, 14, -1.0, False)],\n",
" 3: [(1.0, 9, -1.0, False)]},\n",
" 11: {0: [(1.0, 7, -1.0, False)],\n",
" 1: [(1.0, 11, -1.0, False)],\n",
" 2: [(1.0, 15, -1.0, True)],\n",
" 3: [(1.0, 10, -1.0, False)]},\n",
" 12: {0: [(1.0, 8, -1.0, False)],\n",
" 1: [(1.0, 13, -1.0, False)],\n",
" 2: [(1.0, 12, -1.0, False)],\n",
" 3: [(1.0, 12, -1.0, False)]},\n",
" 13: {0: [(1.0, 9, -1.0, False)],\n",
" 1: [(1.0, 14, -1.0, False)],\n",
" 2: [(1.0, 13, -1.0, False)],\n",
" 3: [(1.0, 12, -1.0, False)]},\n",
" 14: {0: [(1.0, 10, -1.0, False)],\n",
" 1: [(1.0, 15, -1.0, True)],\n",
" 2: [(1.0, 14, -1.0, False)],\n",
" 3: [(1.0, 13, -1.0, False)]},\n",
" 15: {0: [(1.0, 15, 0.0, True)],\n",
" 1: [(1.0, 15, 0.0, True)],\n",
" 2: [(1.0, 15, 0.0, True)],\n",
" 3: [(1.0, 15, 0.0, True)]}}"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"env.P"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"env.reset()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"T x o o\n",
"o o o o\n",
"o o o o\n",
"o o o T\n"
]
}
],
"source": [
"env.render()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment