Skip to content

Instantly share code, notes, and snippets.

@horoiwa
horoiwa / call_env.py
Created May 10, 2020 05:23
cartpole-v1環境の呼びだし
import gym
from gym import wrappers
env = gym.make(ENV_NAME)
env = wrappers.Monitor(env, "./log", force=True,
video_callable=(lambda ep: ep % 25 == 0))
agent = DQNAgent(env=env)
agent.play(episodes=400)
@horoiwa
horoiwa / agent.py
Last active May 10, 2020 06:03
DQNAgent
class DQNAgent:
""" ==== 中略 ==== """
def play(self, episodes):
total_rewards = []
for n in range(episodes):
import collections
@dataclass
class Experience:
state: np.ndarray
action: int
reward: float
@horoiwa
horoiwa / qnet.py
Created May 10, 2020 07:06
Q関数
import numpy as np
import tensorflow as tf
import tensorflow.keras.layers as kl
class QNetwork(tf.keras.Model):
def __init__(self, action_space, lr=0.001):
super(QNetwork, self).__init__()
class DQNAgent:
""""略:その他のメソッド""""
def update_qnetwork(self):
(states, actions, rewards,
next_states, dones) = self.get_minibatch(self.BATCH_SIZE)
next_Qs = np.max(self.target_network.predict(next_states), axis=1)
import threading
import tensorflow as tf
import gym
class GlobalCounter:
def __init__(self):
n = 0
import tensorflow as tf
import tensorflow.keras.layers as kl
import tensorflow_probability as tfp
import numpy as np
class ActorCriticNet(tf.keras.Model):
def __init__(self, action_space=2):
class A3CAgent:
""" 中略 """
def play(self, coord):
self.total_reward = 0
self.state = self.env.reset()
class A3CAgent:
""" 中略 """
def compute_loss(self, states, actions, discounted_rewards):
states = tf.convert_to_tensor(
np.vstack(states), dtype=tf.float32)
values, logits = self.local_ACNet(states)
import gym
def envfunc():
env = gym.make("BreakoutDeterministic-v4")
return env
class A2CAgent: