Skip to content

Instantly share code, notes, and snippets.

View PierreExeter's full-sized avatar

Pierre Aumjaud PierreExeter

View GitHub Profile
import gym
from gym import error, spaces, utils
from gym.utils import seeding
class TicTacEnv(gym.Env):
metadata = {'render.modes': ['human']}
def __init__(self):
self.state = []
import gym
import pybulletgym
env = gym.make('ReacherPyBulletEnv-v0')
env.render(mode="human")
for episode in range(20):
state = env.reset()
rewards = []
import gym
import pybullet_envs
import time
env = gym.make("CartPoleBulletEnv-v1")
env.reset()
while True:
action = [1, 0]
state, reward, done, info = env.step(action)
import pybullet as p
import pybullet_data
import time
# start pybullet simulation
p.connect(p.GUI)
# reset the simulation to its original state
p.resetSimulation()
import pybullet as p
import time
import pybullet_data
# Start pybullet simulation
p.connect(p.GUI)
# p.connect(p.DIRECT) # don't render
# load urdf file path
p.setAdditionalSearchPath(pybullet_data.getDataPath())
model = Sequential()
model.add(Dense(24, input_dim=n_states, activation='tanh'))
model.add(Dense(48, activation='tanh'))
model.add(Dense(n_actions, activation='linear'))
model.compile(loss='mse', optimizer=Adam(lr=alpha, decay=alpha_decay))
# TRAINING PHASE
rewards = []
for episode in range(n_episodes):
current_state = env.reset()
current_state = discretize(current_state)
alpha = get_alpha(episode)
epsilon = get_epsilon(episode)
# HYPERPARAMETERS
n_episodes = 1000 # Total train episodes
n_steps = 200 # Max steps per episode
min_alpha = 0.1 # learning rate
min_epsilon = 0.1 # exploration rate
gamma = 1 # discount factor
ada_divisor = 25 # decay rate parameter for alpha and epsilon
# INITIALISE Q MATRIX
Q = np.zeros(buckets + (n_actions,))
# define the number of buckets for each state value (x, x', theta, theta')
buckets = (1, 1, 6, 12)
# define upper and lower bounds for each state value
upper_bounds = [
env.observation_space.high[0],
0.5,
env.observation_space.high[2],
math.radians(50)
]
import gym
import numpy as np
import math
import matplotlib.pyplot as plt
# CREATE ENVIRONMENT
env = gym.make('CartPole-v0')
n_actions = env.action_space.n
n_states = env.observation_space.shape[0]
print("Action space size: ", n_actions)