Last active
December 20, 2017 01:12
-
-
Save denny0323/f746dda7bff5e5482bbfcf5d25d7e2d1 to your computer and use it in GitHub Desktop.
0. FrozenLake-v0(Q-Table_detail)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Q-Table Learning in FrozenLake(detail)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"from OmpenAI gymm" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# import package\n", | |
"import gym\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"env = gym.make('FrozenLake-v0')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"FrozenLake env review)\n", | |
"\n", | |
"* Structure : a 4 X 4 grid of blocks.\n", | |
"* Blocks of State : Start / Goal / Safe-Frozen / Dangerous hole.\n", | |
"* Objective : To have an agent learn to navigate from Start Block to Goal Block without moving onto a hole.\n", | |
"* The Catch : A wind which occasionally blows the agent onto a space, against its will." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Q-Table Learning Algorithm" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#Initialize table with all zeros\n", | |
"Q = np.zeros([env.observation_space.n, env.action_space.n])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[ 0., 0., 0., 0.],\n", | |
" [ 0., 0., 0., 0.],\n", | |
" [ 0., 0., 0., 0.],\n", | |
" [ 0., 0., 0., 0.],\n", | |
" [ 0., 0., 0., 0.],\n", | |
" [ 0., 0., 0., 0.],\n", | |
" [ 0., 0., 0., 0.],\n", | |
" [ 0., 0., 0., 0.],\n", | |
" [ 0., 0., 0., 0.],\n", | |
" [ 0., 0., 0., 0.],\n", | |
" [ 0., 0., 0., 0.],\n", | |
" [ 0., 0., 0., 0.],\n", | |
" [ 0., 0., 0., 0.],\n", | |
" [ 0., 0., 0., 0.],\n", | |
" [ 0., 0., 0., 0.],\n", | |
" [ 0., 0., 0., 0.]])" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Q" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Q -Table of FrozenLake env. has a 16 X 4 grid of shape. \n", | |
"(one for each block : 16) X (Action : 4) // up, down, left or right" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"\u001b[41mS\u001b[0mFFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFFG\n", | |
"------------------------\n", | |
" (Left)\n", | |
"\u001b[41mS\u001b[0mFFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFFG\n", | |
"action: 0\n", | |
"state : 0 \n", | |
"info : 0.333\n", | |
"------------------------\n", | |
" (Down)\n", | |
"S\u001b[41mF\u001b[0mFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFFG\n", | |
"action: 1\n", | |
"state : 1 \n", | |
"info : 0.333\n", | |
"------------------------\n", | |
" (Right)\n", | |
"SFFF\n", | |
"\u001b[41mF\u001b[0mHFH\n", | |
"FFFH\n", | |
"HFFG\n", | |
"action: 2\n", | |
"state : 4 \n", | |
"info : 0.333\n", | |
"------------------------\n", | |
" (Up)\n", | |
"S\u001b[41mF\u001b[0mFF\n", | |
"FHFH\n", | |
"FFFH\n", | |
"HFFG\n", | |
"action: 3\n", | |
"state : 1 \n", | |
"info : 0.333\n", | |
"------------------------\n" | |
] | |
} | |
], | |
"source": [ | |
"observation = env.reset()\n", | |
"env.render()\n", | |
"print('------------------------')\n", | |
"for i in range(env.action_space.n):\n", | |
" action = i\n", | |
" observation, reward, done, info = env.step(action)\n", | |
" env.render()\n", | |
" print('action: %d' %action)\n", | |
" print('state : %d \\ninfo : %.3f' %(observation,info['prob']))\n", | |
" print('------------------------')\n", | |
" observation = env.reset()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"as above result, we can know\n", | |
"* action 0 : Left\n", | |
"* action 1 : Down\n", | |
"* action 2 : Right\n", | |
"* action 3 : UP" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Set learning parameters\n", | |
"lr = .8 # learning rate\n", | |
"y = .95 # discount factor\n", | |
"num_episodes = 2000\n", | |
"\n", | |
"#create lists to contain total rewards and steps per episode\n", | |
"rList = [] # reword list\n", | |
"# sList = [] # state list" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [], | |
"source": [ | |
"for i in range(num_episodes):\n", | |
" s = env.reset() # Reset environment and get first new observation\n", | |
" rAll = 0 # total reward\n", | |
" d = False # end of precess\n", | |
" j = 0 # step\n", | |
"# print('------<initial state>-------')\n", | |
"# env.render()\n", | |
"# print('state : %d \\ninfo : %.3f' %(s,info['prob']))\n", | |
"# print('----------------------------')\n", | |
" #The Q-Table learning algorithm\n", | |
" while j < 100:\n", | |
" j+=1\n", | |
" \n", | |
" # Choose an action by greedily (with noise) picking from Q table\n", | |
" # 1/ (i+1) factor has the effect of cutting back on randomness\n", | |
" a = np.argmax(Q[s,:] + np.random.randn(1,env.action_space.n)*(1./(i+1))) \n", | |
" \n", | |
" #Get new state and reward of action an agent did from environment\n", | |
" s1, r, d, info = env.step(a)\n", | |
" \n", | |
" # Update Q-Table with new knowledge(=reward)\n", | |
" Q[s,a] = Q[s,a] + lr*(r + y*np.max(Q[s1,:]) - Q[s,a]) \n", | |
" \n", | |
" rAll += r # add reward \n", | |
" s = s1 # move to next state\n", | |
"\n", | |
" # Check some conditions in console\n", | |
"# env.render()\n", | |
"# print('step : ')\n", | |
"# print('action: %d' %a)\n", | |
"# print('state : %d \\ninfo : %.3f' %(s,info['prob']))\n", | |
"# print('----------------------------')\n", | |
" \n", | |
" # check the end of process\n", | |
" if d == True:\n", | |
"# if rAll == 1:\n", | |
"# print('Arrive at goal State!\\n')\n", | |
"\n", | |
"# else:\n", | |
"# print('Arrive at hole. T.T\\n')\n", | |
" break\n", | |
" #jList.append(j)\n", | |
" rList.append(rAll)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Score over time: 0.585\n" | |
] | |
} | |
], | |
"source": [ | |
"print (\"Score over time: \" + str(sum(rList)/num_episodes))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Final Q-Table Values\n", | |
"[[ 0.103 0.008 0.006 0.006]\n", | |
" [ 0.001 0.001 0.003 0.35 ]\n", | |
" [ 0.003 0.003 0.008 0.104]\n", | |
" [ 0. 0. 0.001 0.074]\n", | |
" [ 0.17 0. 0.001 0.001]\n", | |
" [ 0. 0. 0. 0. ]\n", | |
" [ 0.114 0. 0. 0. ]\n", | |
" [ 0. 0. 0. 0. ]\n", | |
" [ 0.001 0. 0.003 0.409]\n", | |
" [ 0.002 0.683 0. 0. ]\n", | |
" [ 0.9 0. 0. 0.002]\n", | |
" [ 0. 0. 0. 0. ]\n", | |
" [ 0. 0. 0. 0. ]\n", | |
" [ 0. 0. 0.945 0.001]\n", | |
" [ 0. 0.995 0. 0. ]\n", | |
" [ 0. 0. 0. 0. ]]\n" | |
] | |
} | |
], | |
"source": [ | |
"print (\"Final Q-Table Values\")\n", | |
"print (np.round(Q,3))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Let me check more detail.\n", | |
"I'll get the index of the maximum prob. in every row, then translate to action." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"at state, the agent move : \n", | |
"['Left', 'Up', 'Up', 'Up', 'Left', 'hole or goal state', 'Left', 'hole or goal state', 'Up', 'Down', 'Left', 'hole or goal state', 'hole or goal state', 'Right', 'Down', 'hole or goal state']\n" | |
] | |
} | |
], | |
"source": [ | |
"action_at_state=[]\n", | |
"action_set=['Left', 'Down', 'Right', 'Up']\n", | |
"for i in range(len(Q)):\n", | |
" if np.sum(Q[i]) == 0:\n", | |
" action_at_state.append('hole or goal state')\n", | |
" else:\n", | |
" idx=np.argmax(Q[i])\n", | |
" action_at_state.append(action_set[idx])\n", | |
"print (\"at state, the agent move : \")\n", | |
"print (action_at_state)" | |
] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"gist_id": "b8c5cb850730ff671a5a63ac9c9f5991", | |
"kernelspec": { | |
"display_name": "Python [default]", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment