%tensorflow_version 1.x
import tensorflow
print(tensorflow.__version__)
# -*- coding: utf-8 -*- | |
"""Lecture 4 Q learning""" | |
import gym | |
import numpy as np | |
import random | |
import matplotlib.pyplot as plt | |
from gym.envs.registration import register |
# -*- coding: utf-8 -*- | |
"""Lecture 3 Dummy Q-learning (table)""" | |
import numpy as np | |
import gym | |
from gym.envs.registration import register | |
import random as pr | |
import matplotlib.pyplot as plt |
# -*- coding: utf-8 -*- | |
"""Lecture 5 Q learning Nondeterministic""" | |
import numpy as np | |
import gym | |
import random | |
import matplotlib.pyplot as plt |
import logging | |
import logging.handlers | |
log = logging.getLogger('snowdeer_log') | |
log.setLevel(logging.DEBUG) | |
formatter = logging.Formatter('[%(levelname)s] (%(filename)s:%(lineno)d) > %(message)s') | |
fileHandler = logging.FileHandler('./log.txt') | |
streamHandler = logging.StreamHandler() |
import numpy as np | |
state_rewards = [-5, 0, 0, 0, 0, 0, 5] | |
final_state = [True, False, False, False, False, False, True] | |
Q_values = [[0.0, 0.0], | |
[0.0, 0.0], | |
[0.0, 0.0], | |
[0.0, 0.0], | |
[0.0, 0.0], |
import numpy as np | |
# action 별 reward 반환 함수 | |
def pull_bandit_arm(bandits, bandit_number): | |
# Pull arm in position bandit_number and return the obtained reward. | |
result = np.random.uniform() | |
return int(result <= bandits[bandit_number]) # 0 또는 1을 반환한다. | |
# action 을 결정하는 함수 | |
# 탐험률 반영 |
import gym | |
import random | |
import numpy as np | |
from keras.layers import Dense, Flatten | |
from keras.models import Sequential | |
from keras.optimizers import Adam | |
env = gym.make('CartPole-v1') | |
states = env.observation_space.shape[0] |
import gym | |
import numpy as np | |
import random | |
from keras.models import Sequential | |
from keras.layers import Dense, Dropout | |
from keras.optimizers import Adam | |
from collections import deque | |
class DQN: |