Skip to content

Instantly share code, notes, and snippets.

@livoras
Created July 22, 2018 12:08
Show Gist options
  • Save livoras/9e77f022a57e15d837859267270e1a79 to your computer and use it in GitHub Desktop.
Save livoras/9e77f022a57e15d837859267270e1a79 to your computer and use it in GitHub Desktop.
Using Q-Learning to solve MountainCar-v0
import random
import gym
import sys
import time
import pickle
import os
env = gym.make('MountainCar-v0')
#####
# { (1, 3): [actions], (3, 2): [actions], etc... }
qTable = {}
epsilon = 0.2 # 探索因子
alpha = 0.5 # 学习因子
gamma = 0.8 # 折扣因子
MAX_EPISODE = 100000
FILE_TO_SAVE = "data2"
lastSaveLen = 0
isSuccess = 0
i = 0
ACTION_LEFT = 0
ACTION_STAY = 1
ACTION_RIGHT = 2
def run():
global i, isSuccess, qTable
qTable = loadObj(FILE_TO_SAVE)
while i < MAX_EPISODE:
state = discretizeState(env.reset())
done = False
while not done:
# 渲染
# if isSuccess > 500:
# env.render()
# 操作
action = getActionByState(state)
newState, reward, done, info = env.step(action)
newState = discretizeState(newState)
updateQ(state, action, newState, reward)
# 切换到下一个状态了
state = newState
# 成功了就拜拜了
if newState[0] >= 0.5:
isSuccess = isSuccess + 1
if isSuccess % 1000 == 0:
print("1000 Successfully! count: =>")
isSuccess = 0
break
i = i + 1
if isSuccess:
print("成功的男人!")
else:
print("失败的男人,一千回合都没有一次成功!", i)
def getActionByState(state):
hasState = state in qTable
# 没有状态或者要探索的时候就随机选择操作
if not hasState or (random.random() <= epsilon):
return env.action_space.sample()
else:
# 找出所有可能的动作中最大的 Q 值的动作返回
actionsQ = qTable[state]
maxVal = max(actionsQ)
return actionsQ.index(maxVal)
# 离散化状态,缩小状态空间
def discretizeState(state):
return (round(state[0], 2), round(state[1], 3))
# 更新Q值表
def updateQ(state, action, nextState, reward):
global lastSaveLen
stateActionsQ = getActionsQByState(state)
nextStateActionsQ = getActionsQByState(nextState)
currentStateQ = stateActionsQ[action]
maxNextStateQ = max(nextStateActionsQ)
newStateQ = (1 - alpha) * currentStateQ + alpha * (reward + gamma * maxNextStateQ)
stateActionsQ[action] = newStateQ
qTable[state] = stateActionsQ
lenOfTable = len(qTable)
if (lenOfTable % 100 is 0) and (lastSaveLen != lenOfTable):
saveObj(qTable, FILE_TO_SAVE)
print("Save done, table length", lenOfTable)
lastSaveLen = lenOfTable
# time.sleep(1)
def getActionsQByState(state):
if state in qTable:
return qTable[state]
else:
return [0, 0, 0]
def saveObj(obj, name):
with open(name, 'wb') as f:
pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def loadObj(name):
if not os.path.exists(name):
return {}
with open(name, 'rb') as f:
return pickle.load(f)
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment