pocokhc/gym_keras_rl_sample.png

## gym_keras_rl_sample.png

      
    Raw
  

              gym_keras_rl_sample.png
            
          
## gym_keras_rl_sample.py
import gym
from keras.models import Model
from keras.layers import *
from keras.optimizers import Adam
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
import matplotlib.pyplot as plt

# ゲームを作成
env = gym.make('CartPole-v0')
print("action_space      : " + str(env.action_space))
print("observation_space : " + str(env.observation_space))
print("reward_range      : " + str(env.reward_range))

# 入力と出力
window_length = 1
input_shape = (window_length,) + env.observation_space.shape
nb_actions = env.action_space.n

# NNモデルを作成
c = input_ = Input(input_shape)
c = Flatten()(c)
c = Dense(16, activation='relu')(c)
c = Dense(16, activation='relu')(c)
c = Dense(16, activation='relu')(c)
c = Dense(nb_actions, activation='linear')(c)
model = Model(input_, c)
#print(model.summary())  # modelを表示

# DQNAgentを使うための準備
memory = SequentialMemory(limit=50000, window_length=window_length)
policy = BoltzmannQPolicy()
agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy)
agent.compile(Adam())

# 最終的なmodelを表示
print(agent.model.summary())

# 訓練
print("--- start ---")
print("'Ctrl + C' is stop.")
history = agent.fit(env, nb_steps=50000, visualize=False, verbose=1)

# 結果を表示
plt.subplot(2,1,1)
plt.plot(history.history["nb_episode_steps"])
plt.ylabel("step")

plt.subplot(2,1,2)
plt.plot(history.history["episode_reward"])
plt.xlabel("episode")
plt.ylabel("reward")

plt.show()

# 訓練結果を見る
agent.test(env, nb_episodes=5, visualize=True)
	import gym
	from keras.models import Model
	from keras.layers import *
	from keras.optimizers import Adam
	from rl.agents.dqn import DQNAgent
	from rl.policy import BoltzmannQPolicy
	from rl.memory import SequentialMemory
	import matplotlib.pyplot as plt

	# ゲームを作成
	env = gym.make('CartPole-v0')
	print("action_space : " + str(env.action_space))
	print("observation_space : " + str(env.observation_space))
	print("reward_range : " + str(env.reward_range))

	# 入力と出力
	window_length = 1
	input_shape = (window_length,) + env.observation_space.shape
	nb_actions = env.action_space.n

	# NNモデルを作成
	c = input_ = Input(input_shape)
	c = Flatten()(c)
	c = Dense(16, activation='relu')(c)
	c = Dense(16, activation='relu')(c)
	c = Dense(16, activation='relu')(c)
	c = Dense(nb_actions, activation='linear')(c)
	model = Model(input_, c)
	#print(model.summary()) # modelを表示

	# DQNAgentを使うための準備
	memory = SequentialMemory(limit=50000, window_length=window_length)
	policy = BoltzmannQPolicy()
	agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy)
	agent.compile(Adam())

	# 最終的なmodelを表示
	print(agent.model.summary())

	# 訓練
	print("--- start ---")
	print("'Ctrl + C' is stop.")
	history = agent.fit(env, nb_steps=50000, visualize=False, verbose=1)

	# 結果を表示
	plt.subplot(2,1,1)
	plt.plot(history.history["nb_episode_steps"])
	plt.ylabel("step")

	plt.subplot(2,1,2)
	plt.plot(history.history["episode_reward"])
	plt.xlabel("episode")
	plt.ylabel("reward")

	plt.show()

	# 訓練結果を見る
	agent.test(env, nb_episodes=5, visualize=True)