Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save analyticsindiamagazine/c544f66085c0863428ea5c223adbe545 to your computer and use it in GitHub Desktop.
Save analyticsindiamagazine/c544f66085c0863428ea5c223adbe545 to your computer and use it in GitHub Desktop.
Q Learning with OpenAI Gym - FrozenLake.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Q Learning with OpenAI Gym - FrozenLake.ipynb",
"provenance": [],
"collapsed_sections": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/analyticsindiamagazine/c544f66085c0863428ea5c223adbe545/q-learning-with-openai-gym-frozenlake.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "54AIvDov_7aa",
"colab_type": "text"
},
"source": [
"## Step -1: Install the dependencies on Google Colab"
]
},
{
"cell_type": "code",
"metadata": {
"id": "gxxpHDIs_lvg",
"colab_type": "code",
"outputId": "20dae3b3-c6c2-4e6b-9144-2ab872a0e6bb",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 70
}
},
"source": [
"!pip install numpy\n",
"!pip install openai-gym"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (1.18.2)\n",
"\u001b[31mERROR: Could not find a version that satisfies the requirement openai-gym (from versions: none)\u001b[0m\n",
"\u001b[31mERROR: No matching distribution found for openai-gym\u001b[0m\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "oU8zRXv8QHlm",
"colab_type": "code",
"colab": {}
},
"source": [
"#import the required libraries.\n",
"import numpy as np\n",
"import gym\n",
"import random"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "mh9jBR_cQ5_a",
"colab_type": "code",
"colab": {}
},
"source": [
"#create the environment usign OpenAI Gym\n",
"env = gym.make(\"FrozenLake-v0\")"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "JEtXMldxQ7uw",
"colab_type": "text"
},
"source": [
"## Step 2: Create the Q-table and initialize it"
]
},
{
"cell_type": "code",
"metadata": {
"id": "Uc0xDVd_Q-C8",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "3abe6456-6584-4ea6-d948-24f3e16b10a7"
},
"source": [
"action_size = env.action_space.n\n",
"state_size = env.observation_space.n\n",
"\n",
"print(f\"Action Space : {action_size} | State Space: {state_size}\")"
],
"execution_count": 20,
"outputs": [
{
"output_type": "stream",
"text": [
"Action Space : 4 | State Space: 16\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "0J_GfR-p25bq",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "a6e8f9d7-9363-41e3-eee4-e7668c4f115a"
},
"source": [
"qtable = np.zeros((state_size, action_size))\n",
"print(qtable.shape)"
],
"execution_count": 21,
"outputs": [
{
"output_type": "stream",
"text": [
"(16, 4)\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "9DbAR9J_3DXa",
"colab_type": "text"
},
"source": [
"## Step 3: Create Required Hyperparameters"
]
},
{
"cell_type": "code",
"metadata": {
"id": "dBHB8MIl71Aw",
"colab_type": "code",
"colab": {}
},
"source": [
"total_episodes = 15000 # Total episodes\n",
"learning_rate = 0.8 # Learning rate\n",
"max_steps = 99 # Max steps per episode\n",
"gamma = 0.95 # Discounting rate\n",
"\n",
"# Exploration parameters\n",
"epsilon = 1.0 # Exploration rate\n",
"max_epsilon = 1.0 # Exploration probability at start\n",
"min_epsilon = 0.01 # Minimum exploration probability \n",
"decay_rate = 0.005 # Exponential decay rate for exploration prob"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "xqu-5j9B7qmy",
"colab_type": "text"
},
"source": [
"## Step 4 : Q-Learning Algorithm"
]
},
{
"cell_type": "code",
"metadata": {
"id": "YJYnA88a3TmG",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 318
},
"outputId": "b7ac72cb-470a-4388-9b7a-dccbb47dd8d9"
},
"source": [
"# List of rewards\n",
"rewards = []\n",
"\n",
"#until learning is stopped\n",
"for episode in range(total_episodes):\n",
" # Reset the environment\n",
" state = env.reset()\n",
" step = 0\n",
" done = False\n",
" total_rewards = 0\n",
" \n",
" for step in range(max_steps):\n",
" #Choose an action a in the current world state (s)\n",
" exp_exp_tradeoff = random.uniform(0, 1)\n",
" \n",
" ## If this number > greater than epsilon --> exploitation (taking the biggest Q value for this state)\n",
" if exp_exp_tradeoff > epsilon:\n",
" action = np.argmax(qtable[state,:])\n",
"\n",
" # Else doing a random choice --> exploration\n",
" else:\n",
" action = env.action_space.sample()\n",
"\n",
" # Take the action (a) and observe the outcome state(s') and reward (r)\n",
" new_state, reward, done, info = env.step(action)\n",
"\n",
" # Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]\n",
" # qtable[new_state,:] : all the actions we can take from new state\n",
" qtable[state, action] = qtable[state, action] + learning_rate * \\\n",
" (reward + gamma * np.max(qtable[new_state, :]) - qtable[state, action])\n",
" \n",
" total_rewards += reward\n",
" \n",
" # Our new state is state\n",
" state = new_state\n",
" \n",
" # If done (if we're dead) : finish episode\n",
" if done == True: \n",
" break\n",
" \n",
" # Reduce epsilon (because we need less and less exploration)\n",
" epsilon = min_epsilon + (max_epsilon - min_epsilon) * np.exp(-decay_rate * episode) \n",
"\n",
" rewards.append(total_rewards)\n",
"\n",
"print (\"Score over time: \" + str(sum(rewards)/total_episodes))\n",
"print(qtable)"
],
"execution_count": 23,
"outputs": [
{
"output_type": "stream",
"text": [
"Score over time: 0.4732\n",
"[[1.05246797e-01 4.01872123e-02 1.35132136e-02 1.48787076e-02]\n",
" [1.82176762e-03 2.69568226e-03 1.09909860e-03 1.61059078e-01]\n",
" [3.17791561e-03 4.06560846e-03 1.30660194e-03 3.01424520e-02]\n",
" [4.94699849e-06 1.93331852e-04 1.20861469e-03 1.93745500e-02]\n",
" [1.10367502e-01 2.91911411e-03 1.00353953e-02 2.63693712e-02]\n",
" [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n",
" [1.35836129e-02 1.82090030e-05 2.64523836e-05 3.82205929e-05]\n",
" [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n",
" [1.48845326e-02 7.52926653e-03 6.93762419e-03 1.83790127e-01]\n",
" [3.19882114e-02 1.54967666e-01 4.70875674e-02 4.27540059e-02]\n",
" [1.93771328e-02 8.31870743e-03 1.75091024e-02 9.78052945e-04]\n",
" [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n",
" [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n",
" [5.55887924e-03 1.98347857e-02 7.45547501e-01 1.61949091e-02]\n",
" [2.49407977e-01 9.81179468e-01 1.92530281e-01 3.14899072e-01]\n",
" [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "gQvoFSsr3TkM",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 727
},
"outputId": "39063780-bc9e-43ed-c292-82d162d9302b"
},
"source": [
"for episode in range(5):\n",
" state = env.reset()\n",
" step = 0\n",
" done = False\n",
" print(\"****************************************************\")\n",
" print(\"EPISODE \", episode)\n",
"\n",
" for step in range(max_steps):\n",
" \n",
" # Take the action (index) that have the maximum expected future reward given that state\n",
" action = np.argmax(qtable[state,:])\n",
" \n",
" new_state, reward, done, info = env.step(action)\n",
" \n",
" if done:\n",
" # Here, we decide to only print the last state (to see if our agent is on the goal or fall into an hole)\n",
" env.render()\n",
" \n",
" # We print the number of step it took.\n",
" print(\"Number of steps\", step)\n",
" break\n",
" state = new_state\n",
"env.close()"
],
"execution_count": 26,
"outputs": [
{
"output_type": "stream",
"text": [
"****************************************************\n",
"EPISODE 0\n",
" (Down)\n",
"SFFF\n",
"FHFH\n",
"FFFH\n",
"HFF\u001b[41mG\u001b[0m\n",
"Number of steps 17\n",
"****************************************************\n",
"EPISODE 1\n",
" (Down)\n",
"SFFF\n",
"FHFH\n",
"FFFH\n",
"HFF\u001b[41mG\u001b[0m\n",
"Number of steps 65\n",
"****************************************************\n",
"EPISODE 2\n",
" (Down)\n",
"SFFF\n",
"FHFH\n",
"FFFH\n",
"HFF\u001b[41mG\u001b[0m\n",
"Number of steps 14\n",
"****************************************************\n",
"EPISODE 3\n",
" (Down)\n",
"SFFF\n",
"FHFH\n",
"FFFH\n",
"HFF\u001b[41mG\u001b[0m\n",
"Number of steps 14\n",
"****************************************************\n",
"EPISODE 4\n",
" (Down)\n",
"SFFF\n",
"FHFH\n",
"FFFH\n",
"HFF\u001b[41mG\u001b[0m\n",
"Number of steps 46\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "yxfk5Cxr3Th0",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "TRCBCx2-3Tfb",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "IFAP7gDL3TdD",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment