Skip to content

Instantly share code, notes, and snippets.

View pythonlessons's full-sized avatar

Rokas Liuberskis pythonlessons

View GitHub Profile
@pythonlessons
pythonlessons / CartPole-reinforcement-learning_1.py
Last active October 14, 2019 10:34
ramdp CartPole environment
import gym
import random
env = gym.make("CartPole-v0")
env.reset()
def Random_games():
# Each of this episode is its own game.
for episode in range(10):
env.reset()
@pythonlessons
pythonlessons / 1_Cartpole_DQN_cartpole_random.py
Last active November 26, 2019 13:30
1_Cartpole_DQN_cartpole_random.py
import gym
import random
env = gym.make("CartPole-v1")
def Random_games():
# Each of this episode is its own game.
for episode in range(10):
env.reset()
# this is each frame, up to 500...but we wont make it that far with random.
@pythonlessons
pythonlessons / 1_Cartpole_DQN_keras_model.py
Created November 26, 2019 14:49
1_Cartpole_DQN_keras_model.py
from keras.models import Model
from keras.layers import Input, Dense
from keras.optimizers import Adam, RMSprop
# Neural Network model for Deep Q Learning
def OurModel(input_shape, action_space):
X_input = Input(input_shape)
# 'Dense' is the basic form of a neural network layer
# Input Layer of state size(4) and Hidden Layer with 512 nodes
@pythonlessons
pythonlessons / 1_Cartpole_DQN_remember_fucntion.py
Created November 26, 2019 14:50
1_Cartpole_DQN_remember_fucntion.py
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
if len(self.memory) > self.train_start:
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
@pythonlessons
pythonlessons / 1_Cartpole_DQN_replay_fucntion.py
Created November 26, 2019 14:51
1_Cartpole_DQN_replay_fucntion.py
def replay(self):
if len(self.memory) < self.train_start:
return
# Randomly sample minibatch from the memory
minibatch = random.sample(self.memory, min(len(self.memory), self.batch_size))
state = np.zeros((self.batch_size, self.state_size))
next_state = np.zeros((self.batch_size, self.state_size))
action, reward, done = [], [], []
@pythonlessons
pythonlessons / 1_Cartpole_DQN_run_fucntion.py
Created November 26, 2019 14:53
1_Cartpole_DQN_run_fucntion.py
def run(self):
for e in range(self.EPISODES):
state = self.env.reset()
state = np.reshape(state, [1, self.state_size])
done = False
i = 0
while not done:
self.env.render()
action = self.act(state)
next_state, reward, done, _ = self.env.step(action)
@pythonlessons
pythonlessons / replay_function.py
Last active January 14, 2020 12:41
02_CartPole-reinforcement-learning_DDQN
def replay(self):
if len(self.memory) < self.train_start:
return
# Randomly sample minibatch from the memory
minibatch = random.sample(self.memory, min(self.batch_size, self.batch_size))
state = np.zeros((self.batch_size, self.state_size))
next_state = np.zeros((self.batch_size, self.state_size))
action, reward, done = [], [], []
@pythonlessons
pythonlessons / update_target_model.py
Last active January 14, 2020 12:37
02_CartPole-reinforcement-learning_DDQN
def update_target_model(self):
if not self.Soft_Update and self.ddqn:
self.target_model.set_weights(self.model.get_weights())
return
if self.Soft_Update and self.ddqn:
q_model_theta = self.model.get_weights()
target_model_theta = self.target_model.get_weights()
counter = 0
for q_weight, target_weight in zip(q_model_theta, target_model_theta):
target_weight = target_weight * (1-self.TAU) + q_weight * self.TAU
@pythonlessons
pythonlessons / PlotModel.py
Created January 14, 2020 12:40
02_CartPole-reinforcement-learning_DDQN
pylab.figure(figsize=(18, 9))
def PlotModel(self, score, episode):
self.scores.append(score)
self.episodes.append(episode)
self.average.append(sum(self.scores) / len(self.scores))
pylab.plot(self.episodes, self.average, 'r')
pylab.plot(self.episodes, self.scores, 'b')
pylab.ylabel('Score', fontsize=18)
pylab.xlabel('Steps', fontsize=18)
dqn = 'DQN_'
@pythonlessons
pythonlessons / OurModel.py
Created January 14, 2020 14:44
03_CartPole-reinforcement-learning_Dueling_DDQN
def OurModel(input_shape, action_space, dueling):
X_input = Input(input_shape)
X = X_input
# 'Dense' is the basic form of a neural network layer
# Input Layer of state size(4) and Hidden Layer with 512 nodes
X = Dense(512, input_shape=input_shape, activation="relu", kernel_initializer='he_uniform')(X)
# Hidden layer with 256 nodes
X = Dense(256, activation="relu", kernel_initializer='he_uniform')(X)