Skip to content

Instantly share code, notes, and snippets.

@pilipolio
Last active February 19, 2017 00:53
Show Gist options
  • Save pilipolio/f551c334c99fb921e16855909269063b to your computer and use it in GitHub Desktop.
Save pilipolio/f551c334c99fb921e16855909269063b to your computer and use it in GitHub Desktop.
FrozenLake with Q tabular learning
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Frozen Lake\n",
"\n",
" * https://gym.openai.com/envs/FrozenLake-v0\n",
" * Inspired by https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-0-q-learning-with-tables-and-neural-networks-d195264329d0#.91x58km60\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import gym\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 232,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2017-02-19 00:08:00,024] Making new env: FrozenLake-v0\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[41mS\u001b[0mFFF\n",
"FHFH\n",
"FFFH\n",
"HFFG\n",
"\n",
"0 0.0 False {'prob': 0.3333333333333333}\n",
"\u001b[41mS\u001b[0mFFF\n",
"FHFH\n",
"FFFH\n",
"HFFG\n",
" (Left)\n",
"4 0.0 False {'prob': 0.3333333333333333}\n",
"SFFF\n",
"\u001b[41mF\u001b[0mHFH\n",
"FFFH\n",
"HFFG\n",
" (Down)\n",
"0 0.0 False {'prob': 0.3333333333333333}\n",
"\u001b[41mS\u001b[0mFFF\n",
"FHFH\n",
"FFFH\n",
"HFFG\n",
" (Right)\n",
"0 0.0 False {'prob': 0.3333333333333333}\n",
"\u001b[41mS\u001b[0mFFF\n",
"FHFH\n",
"FFFH\n",
"HFFG\n",
" (Up)\n"
]
},
{
"data": {
"text/plain": [
"<ipykernel.iostream.OutStream at 0x10c5dc2e8>"
]
},
"execution_count": 232,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# slipperly lake, so actions not deterministic!\n",
"env = gym.make('FrozenLake-v0')\n",
"\n",
"env.render()\n",
"\n",
"next_s, r, is_end, inf = env.step(action=0)\n",
"print(next_s, r, is_end, inf)\n",
"env.render()\n",
"\n",
"next_s, r, is_end, inf = env.step(action=1)\n",
"print(next_s, r, is_end, inf)\n",
"env.render()\n",
"\n",
"next_s, r, is_end, inf = env.step(action=2)\n",
"print(next_s, r, is_end, inf)\n",
"env.render()\n",
"\n",
"next_s, r, is_end, inf = env.step(action=3)\n",
"print(next_s, r, is_end, inf)\n",
"env.render()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Tabular Q learning\n",
"\n",
"See https://webdocs.cs.ualberta.ca/~sutton/book/ebook/node65.html\n",
"\n",
"![alt text](https://webdocs.cs.ualberta.ca/~sutton/book/ebook/numeqtmp31.png \"Q learning update rule\")\n"
]
},
{
"cell_type": "code",
"execution_count": 234,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 286,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import math\n",
"import random\n",
"\n",
"def one_step_update(Q, s, a, r, next_s, alpha, gamma):\n",
" Q[s, a] += alpha * (r + gamma * Q[next_s, :].max() - Q[s, a])\n",
" return Q\n",
"\n",
"def eps_greedy_action(Q, s, epsilon):\n",
" return random.choice(np.arange(Q.shape[1])) if (random.uniform(0, 1) <= epsilon) else Q[s, :].argmax()\n",
"\n",
"\n",
"class QLearning:\n",
" def __init__(self, Q, epsilon, alpha=0.1, gamma=.99):\n",
" self.Q = Q\n",
" self.epsilon = epsilon\n",
" self.alpha = alpha\n",
" self.gamma = gamma\n",
" self.n_episodes = 0\n",
" \n",
" def take_step(self, env, s):\n",
" # decayed and first\n",
" epsilon = (self.epsilon / (1 + self.n_episodes)) if (self.n_episodes <= 5000) else 0\n",
" a = eps_greedy_action(self.Q, s, epsilon)\n",
" next_s, r, is_end, _ = env.step(a)\n",
" self.Q = one_step_update(self.Q, s, a, r, next_s, self.alpha, self.gamma)\n",
" \n",
" if is_end:\n",
" self.n_episodes += 1\n",
"\n",
" return next_s, r, is_end\n",
" \n",
" @classmethod\n",
" def create(cls, epsilon=10, alpha=0.1, gamma=.99):\n",
" Q = np.zeros((env.observation_space.n, env.action_space.n))\n",
" return cls(Q, epsilon, alpha, gamma)\n",
"\n",
"def play_new_episode(env, act_func):\n",
" s = env.reset()\n",
" is_end = False\n",
" i = 0\n",
" while (not is_end) and i < 100:\n",
" i += 1\n",
" s, r, is_end = act_func(env, s)\n",
" yield r"
]
},
{
"cell_type": "code",
"execution_count": 253,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[41mS\u001b[0mFFF\n",
"FHFH\n",
"FFFH\n",
"HFFG\n",
" (Up)\n",
"\n",
"S\u001b[41mF\u001b[0mFF\n",
"FHFH\n",
"FFFH\n",
"HFFG\n",
" (Down)\n",
"\n",
"SFFF\n",
"F\u001b[41mH\u001b[0mFH\n",
"FFFH\n",
"HFFG\n",
" (Down)\n",
"\n"
]
}
],
"source": [
"def capture_new_episode(env, act_func):\n",
" rewards, debugs = zip(*[(r, env.render(mode='ansi')) for r in play_new_episode(env, act_func)])\n",
" return sum(rewards), '\\n'.join(d.getvalue() for d in debugs)\n",
"\n",
"total_reward, debug = capture_new_episode(env, QLearning.create().take_step)\n",
"\n",
"print(debug)"
]
},
{
"cell_type": "code",
"execution_count": 313,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.613\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAO4AAADuCAYAAAA+7jsiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADsBJREFUeJzt3VlsXNdhxvH/mZUc7qRGMheJkSzJ2ix5kdx4CWIJjg0v\nteMEDlKkUFu0SFo0KPrQGkWLPrR9aVM0LwVaIEWXlwJpEyAoUCRpCiOFE8V2JTeWYymRyNAiTWpI\nabhzOEPOcvpAaSRajiiKw3vuIb/fk4YzNj5y5s97h8PFWGsREb9EXA8QkdVTuCIeUrgiHlK4Ih5S\nuCIeUrgiHlK4Ih5SuCIeUrgiHoqt5sbR5gYbT7eu1xZnbHHjfv4yFdcL1ocpu16wPorTE5Tmc2al\n260q3Hi6ld6vfOnuV4XUQiblesK6ic9uzE9KsbkVH9teuvRPX72j223Me1Vkg1O4Ih5SuCIeUrgi\nHlK4Ih5SuCIeUrgiHlK4Ih5SuCIeUrgiHlK4Ih5SuCIeUrgB0++x9ktY7y+FGyBbqZB/9z3XM2QV\nCqPDFKcnXM+4hcIN0MKlIebePO16hqzCXN855vrOuZ5xC4UboPx75ylc6KOyuOh6ityhpXDPu55x\nC4UboPx757HFEoUL/a6nyB0ozkyxMDbC/FA/5YWC6znLKNyAFLMTFDNjwFLAEn5z/Uv3ky2XmX//\nouM1yyncgOTP3Yg1/95PsZUN+sugNpDr4S79O1zPcxVuQG4+ypZnZlgcHnG4RlZSKS4yf+nGUXau\nP1yfbBVuACr5AlQsyT33AtD05BMs9A04XiW3Mz/YT/OBh4g2NBFv6yDVu5tC5gPXs6oUbgBMMsHW\nL3+RROc9ALQ+9wxNxz/heJXcTsOufXS+8HkiiSTRVCPdL5+krnO761lVzsOtFEvk+4Zdz1hXJhLB\nGHPL2yS8Pur+CdN9tqrfq1wrpek5cv/XR+7MBXJnf077i49Rv6fHxRQRLwUSrrWWxaErzJ25QO7t\nixT6huGm7wGdffOn5N658dpmtLWR7ld/JYhpIl4K5ohbsZRn56nM5SnPzS+LFqAyX8AuFquXTSwa\nyKygtb38Aq0vPYeJx11PqanC8BAj//B3bHn+JVoeedT1nJra+Vt/ACZ8fzUhkHBNNELq0E5Sh3aS\n/rVnWLycXTr6nrlI/mdDtD77CO0vPRHEFKdMLEb4HgI1YC22WIQQvVxSK5F4wvWEj+TkOW6iawvt\nL26h/cXHKc/lKQxcdjFDxFvOv0wWbayn4fC9rmfIXfrwz6taa0P7M6wbiZMjrmwc49/9z+q/Cx8M\nkR98n22f+4LDRZuD8yOu+C2+Jc3UD/4HgNkfnyFSV3/La9ZSewpX1qThvgPLL+878AtuKbWkcGVN\nYs3NJHuWvhXQxBPU79rteNHmoHBlzRr2HQQgtXsPkQ32GnVYKVxZs+unxw37DzpesnkoXFmzRGcX\nsZZWUvftdz1l01C4smbGGNqfeoZYU7PrKZuGwpWaaHrwqOsJm4rClZoI08+qbgb6aIt4SOGKeEjh\ninhI4Yp4SOGKeEjhinhI4Yp4SOGKeEjhinhI4Yp4SOGKeEjhinhoVb/l0RhLIl5ary3O7Pi9t1xP\nWDfZL26svyxw3Zav/cj1hHUxYnN3dDsdcUU8pHBFPKRwRTykcEU8pHBFPKRwRTykcEU8pHBFPKRw\nRTykcEU8pHBFPKRwRTykcEU8pHBFPKRwRTykcEU8pHBFPKRwRTykcEU8pHBFPKRwRTykcEU8pHBF\nPKRwRTwUinCtta4nrJuN+r5t5PfLh/dtVX/JoJbK+UVy7wwwc/oikXiMrt95ztWUmrLWMsMkWTJM\ncIUjPEaCpOtZNVHMTTM9eJ6ZwfO03nuE9r1HXU+qibItM8kVrpKhRJFDPOJ60ooCDXfx6jSzp/uY\nPX2R3E8uYYtlANqfPcr4t89Ubxepi9N24kiQ09akbEuMc4UsGbJkWKQAQDNtjDEMN30C30oXSVPv\naOnqWGvJZ4erseazw0tXRCI0du8he+5U9bbJtm00de12tHT1FmyeLKNc5TITXKHC0mOxk16GGaje\nZwZDj9nlcOlHCyRcW64w8d23ufqNH1KanLvl+onvnFl2OZ5u8SbcaTvORd5lmvFbrpthkhkml72t\nkWaShD/cUmGe0dPfYfziaWypuPzKSoXLb/zHsjd17Pu4F+Faa8kwyADnKTB/y/UZBskwWL0cIUoP\nmzRcE43Q8fwx2p89Sr7/MrOnLzJ7uo/C+2MAbP3V4zQe3nnj9vFQPPW+Iy2mg2Mcp2Dz14641z+D\nV2ilgz0cXnb7BpodLV2dWF2Knk98lq5HX2R2pI+ZwfPMDJ2nmJuGSIRdz/wm0eSNT0Cx+kaHa++c\nMYYuPkan7WWOaa5eu8+uf4LtZS9b6b75v3AzdAWBniqbiCG1t5vU3m62feH40qnzmT7K0/Ok7ute\n+X8QYnWmnh520cMuyrbEBFfIMko9DSRMnet5dy0Si9PSe4CW3gPXTp1HmBk8R6W0SPOOfa7n3TVj\nDE200kQru9jPgi2QJcM8szTTjjHhDPY6Z1+cAkikW+h4dmN8geNmURMjTRdpulxPqSljDKl0D6l0\nj+spNZc0dXSzc+UbhoQ/56QiUqVwRTykcEU8pHBFPKRwRTykcEU8pHBFPKRwRTykcEU8pHBFPKRw\nRTykcEU8pHBFPKRwRTykcEU8pHBFPKRwRTykcEU8pHBFPKRwRTykcEU8pHBFPKRwRTy0qt+rHItW\n2NKYW68tzrSdanc9Yd00lPtdT1gXua+5XuCWjrgiHlK4Ih5SuCIeUrgiHlK4Ih5SuCIeUrgiHlK4\nIh5SuCIeUrgiHlK4Ih5SuCIeUrgiHlK4Ih4KZbjlxZLrCSKhFspwr37/AvnRadczREIrlOFm3xhg\n/I0B1zNEQit04VaKZSbPXFK4IrcRunCnzg5TzheZOvsBpdyC6zkioRS6cMff/DkAtlRh8u1Bx2tE\nwilU4Vprl50iZ3W6LPKRQhXu/OA4hdGZ6uWJt97HlisOF4mEU6jCLYzNcOgvXiLakKTt4V56T36c\nfEYvC4l82Kp+r/J66/ilXQAYA5FElJ5PP+h4kUg4heqIK+FXnFtg5LV+xt4acj2lpqy1TNksQ7YP\na63rOSsK1RFXwik3Ms3oqUuMnhpk/GwGgCf/+RWKucXqbSKxCNGkXw+nki0yzhhZMmTJUGSRXRyg\nTAluajdm4u5G/gKh/Eg37+8k1dvhekbNzQ1Pk8/O03FoG5FYuE92FmcK9H/9LJnX32ducPKW679/\n8t+WXe795f088OqTAa27e9ZaRhkiwyCTXMWy/Og6wHkGOF+9HCHKCV4OeuaKQhnu4b/8jOsJ66Lv\n39+j/5vn+PR//zqRWML1nNtKNNex+/NHaPpYG2OnBhn73yFKc0tHWBON0Pvifowx1du333+Pq6mr\nYoxhm91OHSkaaSFLhnnmqtd3sI16GquXIyF9NhnKcCUcEs11bH96L9uf3kulVGb83VHGTl1i9EeD\npB/uoeuTu1xPvCsRE6GNNG2k2csRcna2erocJc4+E/4viipcuSORWJT0Q92kH+rm4Jcfqx59N4IG\n00QDTfSyl6JdxFq77GwijMJ5HrDBlPJFSvPFZW8rTOQdrVk7YwzxpqTrGesibhKhjxYUbiAisQjf\nO/lNxs6MAPDDP/wvhr7X53iV+EzhBiASj9J23xZmL00BkH0nQ+fjvY5Xic8UbkA6n7gRatOOFpq2\ntzhcI75TuAHpfHQHRJaeO90cscjdULgBSbbW0XFoKwBdOk2WNVK4Aep6rJd4U5KO+7e5niKe0+u4\nAep8YgfTAxOh/3ZHCT89ggLUvLON3a8ccj1DNgCFGyBjDB0Ht7qeIRuAwhXxkMIV8ZDCFfGQwhXx\nkMIV8ZDCFfGQwhXxkMIV8ZDCFfGQwhXxkMIV8ZDCFfGQwhXx0Kp+HtdcXCT21Mb6Y08Ar1zcuH9A\n+7ONMyvfyEPP8IDrCU7piCviIYUr4iGFK+IhhSviIYUr4iGFK+IhhSviIYUr4iGFK+IhhSviIYUr\n4iGFK+IhhSviIYUr4iGFK+IhhSviIYUr4iGFK+IhhSviIYUr4iGFK+IhhSviIYUr4iGFK+Ih5+Fa\na8nZWdcznMlcKlApW9czhOuPxRmsDf/9saq/ZFArZVtmkitcJUOWDPewnT0cdjHFubM/mOEbfzvK\ng082c/R4C4efaCbVFHU9a9Oo2DKTZMmS4SqXaSPNQXPM9awVBRbugs1f++BkmOAKFcrV66YZ5x17\nqno5QZID5mhQ0wL1r389wnB/oXq5uFBhdrLE69+a4PVvTRCNGw4ca+ThEy0cPdHC1u1Jh2s3pkW7\nQPbaQWOcMcqUqtflmFn2WIwQ4bB51MXM2wok3IqtMMU4U4wzzfiyaAFmmcZgqpeT1Acxy4nBn+Xp\neydXvfzhs7Jy0dJ/NkdDS5SG5iitW+Mkks6f0dzWj3+ywGd+I8OfvdrOyc81u55zW9Zapq89FqcY\nXxYtQI5Z5pmrXo4QzrOfQMKNmAjb6GEbPdUP3PXT5Bwz7OYQ283uIKY498f/uPz9PPPaFF/57QG2\n9iR4+EQLD59o4cCxRmKJcMd6s8WiZWikRG4+/M8NjTGk6SJNF9ZaZpisHn1nmWIHe7jXHHQ9c0WB\nP8c1xtDKFlrZwh7uZ97OMctU0DNCIxI1fPXb++neXYcxZuX/QGrGGEML7bTQzr0cpGDnmWIca23o\n7wsnX5y6Wco0kqLR9QxnHnqyxfUEuabOpLiHlOsZd8Sf8zEJpT//mwlee30egHMXFvndP7rixcsp\nvlO4sibpjih/+lcTAPz9v0yzWCT0p5kbgcKVNXn+qdRtL8v6ULiyJjt64hw5mAAgmTR86pMKNwgK\nV9bshU81AHD88XoaUnpIBUEfZVmz56+F+8LTDY6XbB4KV9bs2ANJtqWjvKDnt4FRuLJmkYjhT36/\nje3dcddTNg2FKzXxpZP6RpIgKVypiVhMr90GSeGKeEjhinhI4Yp4SOGKeEjhinhI4Yp4SOGKeEjh\ninhI4Yp4SOGKeEjhinhI4Yp4SOGKeMis5ldpGmOuAoPrN0dk0+u11qZXutGqwhWRcNCpsoiHFK6I\nhxSuiIcUroiHFK6IhxSuiIcUroiHFK6IhxSuiIf+HziD+LEmUlKyAAAAAElFTkSuQmCC\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x10fbff390>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"n_episodes = 10000\n",
"qlearn = QLearning.create(epsilon=100)\n",
"\n",
"simulated_episodes = (play_new_episode(env, qlearn.take_step) for _ in range(n_episodes))\n",
"average_reward = sum(sum(rs) for rs in simulated_episodes) / n_episodes\n",
"print(average_reward)\n",
"\n",
"from matplotlib import pyplot as plt\n",
"%matplotlib inline\n",
"\n",
"def showQ(Q):\n",
" actions_grid = Q.argmax(1).reshape((4, 4))\n",
" values_grid = Q.max(1).reshape((4, 4))\n",
"\n",
" actions_xys = np.array([\n",
" [-1, 0], #L\n",
" [0, -1], #D\n",
" [1, 0], #R\n",
" [0, 1], #U\n",
" ])\n",
"\n",
" actions_x_grid = actions_xys[actions_grid, 0]\n",
" actions_y_grid = actions_xys[actions_grid, 1]\n",
"\n",
" plt.imshow(values_grid, interpolation='nearest')\n",
"\n",
" plt.quiver(actions_x_grid, actions_y_grid)\n",
"\n",
" plt.xticks([]); plt.yticks([]);\n",
"\n",
"showQ(qlearn.Q)"
]
},
{
"cell_type": "code",
"execution_count": 310,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.72\n"
]
}
],
"source": [
"n_episodes = 100\n",
"\n",
"simulated_episodes = (capture_new_episode(env, QLearning(qlearn.Q, epsilon=0).take_step) for _ in range(n_episodes))\n",
"\n",
"average_reward = sum(total_reward for total_reward, _ in simulated_episodes) / n_episodes\n",
"print(average_reward)"
]
},
{
"cell_type": "code",
"execution_count": 321,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SFFF\n",
"\u001b[41mF\u001b[0mHFH\n",
"FFFH\n",
"HFFG\n",
" (Left)\n",
"\n",
"SFFF\n",
"FHFH\n",
"\u001b[41mF\u001b[0mFFH\n",
"HFFG\n",
" (Left)\n",
"\n",
"SFFF\n",
"FHFH\n",
"F\u001b[41mF\u001b[0mFH\n",
"HFFG\n",
" (Up)\n",
"\n",
"SFFF\n",
"FHFH\n",
"FF\u001b[41mF\u001b[0mH\n",
"HFFG\n",
" (Down)\n",
"\n",
"SFFF\n",
"FHFH\n",
"FFFH\n",
"HF\u001b[41mF\u001b[0mG\n",
" (Left)\n",
"\n",
"SFFF\n",
"FHFH\n",
"FFFH\n",
"HFF\u001b[41mG\u001b[0m\n",
" (Down)\n",
"\n"
]
}
],
"source": [
"simulated_episodes = (capture_new_episode(env, QLearning(qlearn.Q, epsilon=0.).take_step) for _ in range(n_episodes))\n",
"first_winning_debug = next(debug for total_reward, debug in simulated_episodes \n",
" if (total_reward > 0 and len(debug) < 300))\n",
"print(first_winning_debug)"
]
},
{
"cell_type": "code",
"execution_count": 311,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2017-02-19 00:46:50,628] Making new env: FrozenLake-v0\n",
"[2017-02-19 00:46:50,636] DEPRECATION WARNING: env.spec.timestep_limit has been deprecated. Replace your call to `env.spec.timestep_limit` with `env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps')`. This change was made 12/28/2016 and is included in version 0.7.0\n",
"[2017-02-19 00:46:50,638] Clearing 26 monitor files from previous run (because force=True was provided)\n",
"[2017-02-19 00:46:50,641] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000000.json\n",
"[2017-02-19 00:46:50,644] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000001.json\n",
"[2017-02-19 00:46:50,648] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000008.json\n",
"[2017-02-19 00:46:50,653] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000027.json\n",
"[2017-02-19 00:46:50,666] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000064.json\n",
"[2017-02-19 00:46:50,680] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000125.json\n",
"[2017-02-19 00:46:50,701] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000216.json\n",
"[2017-02-19 00:46:50,732] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000343.json\n",
"[2017-02-19 00:46:50,783] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000512.json\n",
"[2017-02-19 00:46:50,884] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video000729.json\n",
"[2017-02-19 00:46:51,043] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video001000.json\n",
"[2017-02-19 00:46:51,793] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video002000.json\n",
"[2017-02-19 00:46:52,640] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video003000.json\n",
"[2017-02-19 00:46:53,444] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video004000.json\n",
"[2017-02-19 00:46:54,319] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video005000.json\n",
"[2017-02-19 00:46:55,253] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video006000.json\n",
"[2017-02-19 00:46:56,296] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video007000.json\n",
"[2017-02-19 00:46:57,306] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video008000.json\n",
"[2017-02-19 00:46:58,304] Starting new video recorder writing to /tmp/FrozenLake-v0/openaigym.video.7.47835.video009000.json\n",
"[2017-02-19 00:46:59,449] Finished writing results. You can upload them to the scoreboard via gym.upload('/tmp/FrozenLake-v0')\n"
]
}
],
"source": [
"qlearn = QLearning.create(epsilon=100)\n",
"\n",
"import gym\n",
"from gym import wrappers\n",
"\n",
"env = wrappers.Monitor(gym.make('FrozenLake-v0'), '/tmp/FrozenLake-v0', force=True)\n",
"\n",
"for i_episode in range(10000):\n",
" r = sum(play_new_episode(env, qlearn.take_step))\n",
"\n",
"env.close()"
]
},
{
"cell_type": "code",
"execution_count": 312,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2017-02-19 00:47:00,333] [FrozenLake-v0] Uploading 10000 episodes of training data\n",
"[2017-02-19 00:47:12,078] [FrozenLake-v0] Uploading videos of 19 training episodes (2124 bytes)\n",
"[2017-02-19 00:47:12,595] [FrozenLake-v0] Creating evaluation object from /tmp/FrozenLake-v0 with learning curve and training video\n",
"[2017-02-19 00:47:12,926] \n",
"****************************************************\n",
"You successfully uploaded your evaluation on FrozenLake-v0 to\n",
"OpenAI Gym! You can find it at:\n",
"\n",
" https://gym.openai.com/evaluations/eval_sYk0FCES1iR4JPQJxfpmA\n",
"\n",
"****************************************************\n"
]
}
],
"source": [
"gym.upload('/tmp/FrozenLake-v0', api_key='sk_bNZUvCfkTfabQCoKoKbjFA')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment