Skip to content

Instantly share code, notes, and snippets.

@malzantot
malzantot / mountaincar_qlearning.py
Last active October 20, 2022 04:00
Solution of MountainCar OpenAI Gym problem using Q-Learning.
"""
Q-Learning example using OpenAI gym MountainCar enviornment
Author: Moustafa Alzantot (malzantot@ucla.edu)
"""
import numpy as np
import gym
from gym import wrappers
"""
Solving FrozenLake8x8 environment using Policy iteration.
Author : Moustafa Alzantot (malzantot@ucla.edu)
"""
import numpy as np
import gym
from gym import wrappers
def run_episode(env, policy, gamma = 1.0, render = False):
@malzantot
malzantot / frozenlake8x8_valueiteration.py
Created July 9, 2017 01:49
Solution of FrozenLake8x8 environment using Value Iteration.
"""
Solving FrozenLake8x8 environment using Value-Itertion.
Author : Moustafa Alzantot (malzantot@ucla.edu)
"""
import numpy as np
import gym
from gym import wrappers
"""
Q-Learning example using OpenAI gym MountainCar enviornment
Author: Moustafa Alzantot (malzantot@ucla.edu)
"""
import numpy as np
import gym
from gym import wrappers
n_states = 50
@malzantot
malzantot / frozenlake_genetic_algorithm.py
Created June 7, 2017 23:06
Solution of the FrozenLake problem using Genetic Algorithm
import numpy as np
import random
import time
import gym
from gym import wrappers
def run_episode(env, policy, episode_len=100):
total_reward = 0
obs = env.reset()
for t in range(episode_len):
import numpy as np
import time
import gym
def run_episode(env, policy, episode_len=100, render=False):
total_reward = 0
obs = env.reset()
for t in range(episode_len):
if render:
import gym
import numpy as np
def gen_random_policy():
return (np.random.uniform(-1,1, size=4), np.random.uniform(-1,1))
def policy_to_action(env, policy, obs):
if np.dot(policy[0], obs) + policy[1] > 0:
return 1
else:
@malzantot
malzantot / openai_gym101.py
Last active April 26, 2017 23:23
openai_gym101
import gym
env = gym.make('CartPole-v0')
# Restart the environment to start a new episode
obs = env.reset()
for step_idx in range(500):
env.render()
obs, reward, done, _ = env.step(env.action_space.sample())