Last active
May 7, 2020 10:20
-
-
Save arcarchit/2b3363e2615df7ef5c8d4941d4dfa9e8 to your computer and use it in GitHub Desktop.
Open AI gym env object
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Open AI provides framework for creating environment and training on that environment. In this post I am pasting a simple notebook for a quick look up on how to use this environments and what all functions are available on environment object.\n", | |
"\n", | |
"I have used environment available on github by Denny Britz. https://github.com/dennybritz/reinforcement-learning/blob/master/lib/envs/gridworld.py \n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import sys\n", | |
"if \"../reinforcement-learning\" not in sys.path:\n", | |
" sys.path.append(\"../reinforcement-learning\") \n", | |
"from lib.envs.gridworld import GridworldEnv" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"env = GridworldEnv()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Discrete(4)" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"env.action_space" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"T o o o\n", | |
"o o o o\n", | |
"o x o o\n", | |
"o o o T\n" | |
] | |
} | |
], | |
"source": [ | |
"env.render()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"16" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"env.nS" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"4" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"env.nA" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"observation, reward, done, info = env.step(1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"10" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"observation" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"-1.0" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"reward" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"False" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"done" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'prob': 1.0}" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"info" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"T o o o\n", | |
"o o o o\n", | |
"o o x o\n", | |
"o o o T\n" | |
] | |
} | |
], | |
"source": [ | |
"env.render()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"3" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"env.action_space.sample()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Discrete(16)" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"env.observation_space" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[(1.0, 2, -1.0, False)]" | |
] | |
}, | |
"execution_count": 27, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"env.P[1][1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{0: {0: [(1.0, 0, 0.0, True)],\n", | |
" 1: [(1.0, 0, 0.0, True)],\n", | |
" 2: [(1.0, 0, 0.0, True)],\n", | |
" 3: [(1.0, 0, 0.0, True)]},\n", | |
" 1: {0: [(1.0, 1, -1.0, False)],\n", | |
" 1: [(1.0, 2, -1.0, False)],\n", | |
" 2: [(1.0, 5, -1.0, False)],\n", | |
" 3: [(1.0, 0, -1.0, True)]},\n", | |
" 2: {0: [(1.0, 2, -1.0, False)],\n", | |
" 1: [(1.0, 3, -1.0, False)],\n", | |
" 2: [(1.0, 6, -1.0, False)],\n", | |
" 3: [(1.0, 1, -1.0, False)]},\n", | |
" 3: {0: [(1.0, 3, -1.0, False)],\n", | |
" 1: [(1.0, 3, -1.0, False)],\n", | |
" 2: [(1.0, 7, -1.0, False)],\n", | |
" 3: [(1.0, 2, -1.0, False)]},\n", | |
" 4: {0: [(1.0, 0, -1.0, True)],\n", | |
" 1: [(1.0, 5, -1.0, False)],\n", | |
" 2: [(1.0, 8, -1.0, False)],\n", | |
" 3: [(1.0, 4, -1.0, False)]},\n", | |
" 5: {0: [(1.0, 1, -1.0, False)],\n", | |
" 1: [(1.0, 6, -1.0, False)],\n", | |
" 2: [(1.0, 9, -1.0, False)],\n", | |
" 3: [(1.0, 4, -1.0, False)]},\n", | |
" 6: {0: [(1.0, 2, -1.0, False)],\n", | |
" 1: [(1.0, 7, -1.0, False)],\n", | |
" 2: [(1.0, 10, -1.0, False)],\n", | |
" 3: [(1.0, 5, -1.0, False)]},\n", | |
" 7: {0: [(1.0, 3, -1.0, False)],\n", | |
" 1: [(1.0, 7, -1.0, False)],\n", | |
" 2: [(1.0, 11, -1.0, False)],\n", | |
" 3: [(1.0, 6, -1.0, False)]},\n", | |
" 8: {0: [(1.0, 4, -1.0, False)],\n", | |
" 1: [(1.0, 9, -1.0, False)],\n", | |
" 2: [(1.0, 12, -1.0, False)],\n", | |
" 3: [(1.0, 8, -1.0, False)]},\n", | |
" 9: {0: [(1.0, 5, -1.0, False)],\n", | |
" 1: [(1.0, 10, -1.0, False)],\n", | |
" 2: [(1.0, 13, -1.0, False)],\n", | |
" 3: [(1.0, 8, -1.0, False)]},\n", | |
" 10: {0: [(1.0, 6, -1.0, False)],\n", | |
" 1: [(1.0, 11, -1.0, False)],\n", | |
" 2: [(1.0, 14, -1.0, False)],\n", | |
" 3: [(1.0, 9, -1.0, False)]},\n", | |
" 11: {0: [(1.0, 7, -1.0, False)],\n", | |
" 1: [(1.0, 11, -1.0, False)],\n", | |
" 2: [(1.0, 15, -1.0, True)],\n", | |
" 3: [(1.0, 10, -1.0, False)]},\n", | |
" 12: {0: [(1.0, 8, -1.0, False)],\n", | |
" 1: [(1.0, 13, -1.0, False)],\n", | |
" 2: [(1.0, 12, -1.0, False)],\n", | |
" 3: [(1.0, 12, -1.0, False)]},\n", | |
" 13: {0: [(1.0, 9, -1.0, False)],\n", | |
" 1: [(1.0, 14, -1.0, False)],\n", | |
" 2: [(1.0, 13, -1.0, False)],\n", | |
" 3: [(1.0, 12, -1.0, False)]},\n", | |
" 14: {0: [(1.0, 10, -1.0, False)],\n", | |
" 1: [(1.0, 15, -1.0, True)],\n", | |
" 2: [(1.0, 14, -1.0, False)],\n", | |
" 3: [(1.0, 13, -1.0, False)]},\n", | |
" 15: {0: [(1.0, 15, 0.0, True)],\n", | |
" 1: [(1.0, 15, 0.0, True)],\n", | |
" 2: [(1.0, 15, 0.0, True)],\n", | |
" 3: [(1.0, 15, 0.0, True)]}}" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"env.P" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"env.reset()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"T x o o\n", | |
"o o o o\n", | |
"o o o o\n", | |
"o o o T\n" | |
] | |
} | |
], | |
"source": [ | |
"env.render()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment