livoras/MountainCar-v0-q-learning.py

## MountainCar-v0-q-learning.py
import random
import gym
import sys
import time
import pickle
import os
env = gym.make('MountainCar-v0')


#####
# { (1, 3): [actions], (3, 2): [actions], etc... }
qTable = {}
epsilon = 0.2 # 探索因子
alpha = 0.5 # 学习因子
gamma = 0.8 # 折扣因子

MAX_EPISODE = 100000
FILE_TO_SAVE = "data2"

lastSaveLen = 0
isSuccess = 0
i = 0

ACTION_LEFT = 0
ACTION_STAY = 1
ACTION_RIGHT = 2

def run():
  global i, isSuccess, qTable
  qTable = loadObj(FILE_TO_SAVE)
  while i < MAX_EPISODE:
    state = discretizeState(env.reset())
    done = False
    while not done:
      # 渲染
      # if isSuccess > 500:
      #   env.render()

      # 操作
      action = getActionByState(state)
      newState, reward, done, info = env.step(action)
      newState = discretizeState(newState)
      updateQ(state, action, newState, reward)
      # 切换到下一个状态了
      state = newState

      # 成功了就拜拜了
      if newState[0] >= 0.5:
        isSuccess = isSuccess + 1
        if isSuccess % 1000 == 0:
          print("1000 Successfully! count: =>")
          isSuccess = 0
        break
    i = i + 1
  if isSuccess:
    print("成功的男人！")
  else:
    print("失败的男人，一千回合都没有一次成功！", i)

def getActionByState(state):
  hasState = state in qTable
  # 没有状态或者要探索的时候就随机选择操作
  if not hasState or (random.random() <= epsilon):
    return env.action_space.sample()
  else:
    # 找出所有可能的动作中最大的 Q 值的动作返回
    actionsQ = qTable[state]
    maxVal = max(actionsQ)
    return actionsQ.index(maxVal)

# 离散化状态，缩小状态空间
def discretizeState(state):
  return (round(state[0], 2), round(state[1], 3))

# 更新Q值表
def updateQ(state, action, nextState, reward):
  global lastSaveLen
  stateActionsQ = getActionsQByState(state)
  nextStateActionsQ = getActionsQByState(nextState)

  currentStateQ = stateActionsQ[action]
  maxNextStateQ = max(nextStateActionsQ)

  newStateQ = (1 - alpha) * currentStateQ + alpha * (reward + gamma * maxNextStateQ)
  stateActionsQ[action] = newStateQ

  qTable[state] = stateActionsQ
  lenOfTable  = len(qTable)
  if (lenOfTable % 100 is 0) and (lastSaveLen != lenOfTable):
    saveObj(qTable, FILE_TO_SAVE)
    print("Save done, table length", lenOfTable)
    lastSaveLen = lenOfTable
  # time.sleep(1)

def getActionsQByState(state):
  if state in qTable:
    return qTable[state]
  else:
    return [0, 0, 0]

def saveObj(obj, name):
  with open(name, 'wb') as f:
    pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def loadObj(name):
  if not os.path.exists(name):
    return {}
  with open(name, 'rb') as f:
    return pickle.load(f)

run()
	import random
	import gym
	import sys
	import time
	import pickle
	import os
	env = gym.make('MountainCar-v0')


	#####
	# { (1, 3): [actions], (3, 2): [actions], etc... }
	qTable = {}
	epsilon = 0.2 # 探索因子
	alpha = 0.5 # 学习因子
	gamma = 0.8 # 折扣因子

	MAX_EPISODE = 100000
	FILE_TO_SAVE = "data2"

	lastSaveLen = 0
	isSuccess = 0
	i = 0

	ACTION_LEFT = 0
	ACTION_STAY = 1
	ACTION_RIGHT = 2

	def run():
	global i, isSuccess, qTable
	qTable = loadObj(FILE_TO_SAVE)
	while i < MAX_EPISODE:
	state = discretizeState(env.reset())
	done = False
	while not done:
	# 渲染
	# if isSuccess > 500:
	# env.render()

	# 操作
	action = getActionByState(state)
	newState, reward, done, info = env.step(action)
	newState = discretizeState(newState)
	updateQ(state, action, newState, reward)
	# 切换到下一个状态了
	state = newState

	# 成功了就拜拜了
	if newState[0] >= 0.5:
	isSuccess = isSuccess + 1
	if isSuccess % 1000 == 0:
	print("1000 Successfully! count: =>")
	isSuccess = 0
	break
	i = i + 1
	if isSuccess:
	print("成功的男人！")
	else:
	print("失败的男人，一千回合都没有一次成功！", i)

	def getActionByState(state):
	hasState = state in qTable
	# 没有状态或者要探索的时候就随机选择操作
	if not hasState or (random.random() <= epsilon):
	return env.action_space.sample()
	else:
	# 找出所有可能的动作中最大的 Q 值的动作返回
	actionsQ = qTable[state]
	maxVal = max(actionsQ)
	return actionsQ.index(maxVal)

	# 离散化状态，缩小状态空间
	def discretizeState(state):
	return (round(state[0], 2), round(state[1], 3))

	# 更新Q值表
	def updateQ(state, action, nextState, reward):
	global lastSaveLen
	stateActionsQ = getActionsQByState(state)
	nextStateActionsQ = getActionsQByState(nextState)

	currentStateQ = stateActionsQ[action]
	maxNextStateQ = max(nextStateActionsQ)

	newStateQ = (1 - alpha) * currentStateQ + alpha * (reward + gamma * maxNextStateQ)
	stateActionsQ[action] = newStateQ

	qTable[state] = stateActionsQ
	lenOfTable = len(qTable)
	if (lenOfTable % 100 is 0) and (lastSaveLen != lenOfTable):
	saveObj(qTable, FILE_TO_SAVE)
	print("Save done, table length", lenOfTable)
	lastSaveLen = lenOfTable
	# time.sleep(1)

	def getActionsQByState(state):
	if state in qTable:
	return qTable[state]
	else:
	return [0, 0, 0]

	def saveObj(obj, name):
	with open(name, 'wb') as f:
	pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

	def loadObj(name):
	if not os.path.exists(name):
	return {}
	with open(name, 'rb') as f:
	return pickle.load(f)

	run()