KarenWest/newPostAIproj3Q1questions

## newPostAIproj3Q1questions
    def computeQValueFromValues(self, state, action):
        """
          Compute the Q-value of action in state from the
          value function stored in self.values.
        """
        "*** YOUR CODE HERE ***"
        #for this state and action, get list of probabilities for
        #the legal transitions to new states for this state
        #if "North" or "South" is action --returns (northState,1-self.noise),
        #followed by (westState, self.noise/2.0) and (eastState, self.noise/2)
        #must check though if transition would take place - since if the
        #transition state = same state passed in, that means it was not allowed
        #Similarly for action = "East" or "West"
        #value of current state
        if self.mdp.isTerminal(state) == True:
            print "terminal state -returning None from computeActionFromValues()"
            return None
        qVal = 0.0
        transitionStatesAndProbs = self.mdp.getTransitionStatesAndProbs(state,action)
        for newState,prob in transitionStatesAndProbs:
            #if newState != state:
                #transition could take place to new state
                #compute q val for this transition
            value = self.values[newState]
            reward = self.mdp.getReward(state,action,newState)
            qVal += prob * (reward + self.discount * value)
        return qVal
        #util.raiseNotDefined()

    def computeActionFromValues(self, state):
        """
          The policy is the best action in the given state
          according to the values currently stored in self.values.

          You may break ties any way you see fit.  Note that if
          there are no legal actions, which is the case at the
          terminal state, you should return None.
        """
        "*** YOUR CODE HERE ***"
        ValueDict = {}
        totalIter = self.iterations
        for i in range(totalIter):
            #ValueDict[iteration] = (action, bestQVal)
            ValueDict[i] = (' ',0)
            #newStates = self.mdp.getStates()
            #print "new states"
            #print newStates
            #list of possible actions from current state
            actions = self.mdp.getPossibleActions(state)
#NOTE-perhaps doe a few interations on paper!!

            #print "actions"
            #print actions
            #make a dictionary of q values from taking all actions from current state
            if actions != None:
                qValDict = {}
                maxQvalAction = 0
                bestAction = 'north'
                for a in actions:
                    #print "action"
                    #print a
                    #self.getQValue(state,a) --> return self.computeQValueFromValues(state, action)
                    qVal = self.getQValue(state, a)
                    #print "q val"
                    #print qVal
                    if qVal != None:
                        qValDict[a] = qVal
                for keyAction,qValForAction in qValDict.items():
                    #print "action,qval"
                    #print keyAction
                    #print qValForAction
                    if qValForAction > maxQvalAction:
                        #print "qval change"
                        #print "action"
                        #print keyAction
                        #print "qVal"
                        #print qValForAction
                        bestAction = keyAction
                        maxQvalAction = qValForAction
                #print "from state"
                #print state
                #print "best action was"
                #print bestAction
                #print "qVal was"
                #print maxQvalAction
                #print "iteration"
                #print i
                self.values[state] = maxQvalAction
                ValueDict[i] = (bestAction,maxQvalAction)
            else:#actions for this state came back empty
                return None
        bestAction,stateValue = ValueDict[self.iterations - 1]

        return bestAction
'''
python gridworld.py -a value -i 5

output:

manual mode??
False

RUNNING 1 EPISODES

BEGINNING EPISODE: 1

current state - environment.getCurrentState()
Started in state: (0, 0)
Took action: north
Ended in state: (0, 1)
Got reward: 0.0

current state - environment.getCurrentState()
Started in state: (0, 1)
Took action: north
Ended in state: (0, 2)
Got reward: 0.0

current state - environment.getCurrentState()
Started in state: (0, 2)
Took action: east
Ended in state: (1, 2)
Got reward: 0.0

current state - environment.getCurrentState()
Started in state: (1, 2)
Took action: east
Ended in state: (2, 2)
Got reward: 0.0

current state - environment.getCurrentState()
Started in state: (2, 2)
Took action: east
Ended in state: (2, 1)
Got reward: 0.0

current state - environment.getCurrentState()
Started in state: (2, 1)
Took action: north
Ended in state: (2, 2)
Got reward: 0.0

current state - environment.getCurrentState()
Started in state: (2, 2)
Took action: east
Ended in state: (3, 2)
Got reward: 0.0

current state - environment.getCurrentState()
Started in state: (3, 2)
Took action: exit
Ended in state: TERMINAL_STATE
Got reward: 1

current state - environment.getCurrentState()
EPISODE 1 COMPLETE: RETURN WAS 0.4782969


AVERAGE RETURNS FROM START STATE: 0.4782969


python autograder.py -q q1

output:

Starting on 4-25 at 14:33:52

Question q1
===========

*** FAIL: Exception raised: -1
***
*** Traceback (most recent call last):
***   File "/home/karen/dev/ai/reinforcement/grading.py", line 71, in grade
***     util.TimeoutFunction(getattr(gradingModule, q),300)(self) # Call the question's function
***   File "/home/karen/dev/ai/reinforcement/util.py", line 608, in __call__
***     result = self.function(*args, **keyArgs)
***   File "autograder.py", line 268, in <lambda>
***     return lambda grades: question.execute(grades)
***   File "/home/karen/dev/ai/reinforcement/testClasses.py", line 49, in execute
***     if not f(grades):
***   File "autograder.py", line 263, in <lambda>
***     return lambda grades: testCase.execute(grades, moduleDict, solutionDict)
***   File "reinforcementTestClasses.py", line 55, in execute
***     testPass, stdOutString, fileOutString = self.executeNIterations(grades, moduleDict, solutionDict, n, checkPolicy)
***   File "reinforcementTestClasses.py", line 68, in executeNIterations
***     valuesPretty, qValuesPretty, actions, policyPretty = self.runAgent(moduleDict, n)
***   File "reinforcementTestClasses.py", line 127, in runAgent
***     policy[state] = agent.computeActionFromValues(state)
***   File "valueIterationAgents.py", line 145, in computeActionFromValues
***     bestAction,stateValue = ValueDict[self.iterations - 1]
*** KeyError: -1
***

### Question q1: 0/6 ###


Finished at 14:33:52

Provisional grades
==================
Question q1: 0/6
------------------
Total: 0/6

Your grades are NOT yet registered.  To register your grades you must
submit your files to the edX website.  The grades obtained through the
edX website are your final grades unless your submission was not in
the spirit of the course,  such as if your submission simply hardcoded
the answers to the tests.   We will screen for this after the deadline.

*If you worked with a partner, you must both submit separately.*

python gridworld.py -a value -i 100 -k 10

error output:
karen@scary:~/dev/ai/reinforcement> python gridworld.py -a value -i 100 -k 10 > gridword100iters10episodes.out
Traceback (most recent call last):
  File "gridworld.py", line 593, in <module>
    returns += runEpisode(a, env, opts.discount, decisionCallback, displayCallback, messageCallback, pauseCallback, episode)
  File "gridworld.py", line 363, in runEpisode
    nextState, reward = environment.doAction(action)
  File "/home/karen/dev/ai/reinforcement/gridworld.py", line 189, in doAction
    (nextState, reward) = self.getRandomNextState(state, action)
  File "/home/karen/dev/ai/reinforcement/gridworld.py", line 200, in getRandomNextState
    successors = self.gridWorld.getTransitionStatesAndProbs(state, action)
  File "/home/karen/dev/ai/reinforcement/gridworld.py", line 119, in getTransitionStatesAndProbs
    raise "Illegal action!"
TypeError: exceptions must be old-style classes or derived from BaseException, not str
karen@scary:~/dev/ai/reinforcement>


'''
	def computeQValueFromValues(self, state, action):
	"""
	Compute the Q-value of action in state from the
	value function stored in self.values.
	"""
	"* YOUR CODE HERE *"
	#for this state and action, get list of probabilities for
	#the legal transitions to new states for this state
	#if "North" or "South" is action --returns (northState,1-self.noise),
	#followed by (westState, self.noise/2.0) and (eastState, self.noise/2)
	#must check though if transition would take place - since if the
	#transition state = same state passed in, that means it was not allowed
	#Similarly for action = "East" or "West"
	#value of current state
	if self.mdp.isTerminal(state) == True:
	print "terminal state -returning None from computeActionFromValues()"
	return None
	qVal = 0.0
	transitionStatesAndProbs = self.mdp.getTransitionStatesAndProbs(state,action)
	for newState,prob in transitionStatesAndProbs:
	#if newState != state:
	#transition could take place to new state
	#compute q val for this transition
	value = self.values[newState]
	reward = self.mdp.getReward(state,action,newState)
	qVal += prob * (reward + self.discount * value)
	return qVal
	#util.raiseNotDefined()

	def computeActionFromValues(self, state):
	"""
	The policy is the best action in the given state
	according to the values currently stored in self.values.

	You may break ties any way you see fit. Note that if
	there are no legal actions, which is the case at the
	terminal state, you should return None.
	"""
	"* YOUR CODE HERE *"
	ValueDict = {}
	totalIter = self.iterations
	for i in range(totalIter):
	#ValueDict[iteration] = (action, bestQVal)
	ValueDict[i] = (' ',0)
	#newStates = self.mdp.getStates()
	#print "new states"
	#print newStates
	#list of possible actions from current state
	actions = self.mdp.getPossibleActions(state)
	#NOTE-perhaps doe a few interations on paper!!

	#print "actions"
	#print actions
	#make a dictionary of q values from taking all actions from current state
	if actions != None:
	qValDict = {}
	maxQvalAction = 0
	bestAction = 'north'
	for a in actions:
	#print "action"
	#print a
	#self.getQValue(state,a) --> return self.computeQValueFromValues(state, action)
	qVal = self.getQValue(state, a)
	#print "q val"
	#print qVal
	if qVal != None:
	qValDict[a] = qVal
	for keyAction,qValForAction in qValDict.items():
	#print "action,qval"
	#print keyAction
	#print qValForAction
	if qValForAction > maxQvalAction:
	#print "qval change"
	#print "action"
	#print keyAction
	#print "qVal"
	#print qValForAction
	bestAction = keyAction
	maxQvalAction = qValForAction
	#print "from state"
	#print state
	#print "best action was"
	#print bestAction
	#print "qVal was"
	#print maxQvalAction
	#print "iteration"
	#print i
	self.values[state] = maxQvalAction
	ValueDict[i] = (bestAction,maxQvalAction)
	else:#actions for this state came back empty
	return None
	bestAction,stateValue = ValueDict[self.iterations - 1]

	return bestAction
	'''
	python gridworld.py -a value -i 5

	output:

	manual mode??
	False

	RUNNING 1 EPISODES

	BEGINNING EPISODE: 1

	current state - environment.getCurrentState()
	Started in state: (0, 0)
	Took action: north
	Ended in state: (0, 1)
	Got reward: 0.0

	current state - environment.getCurrentState()
	Started in state: (0, 1)
	Took action: north
	Ended in state: (0, 2)
	Got reward: 0.0

	current state - environment.getCurrentState()
	Started in state: (0, 2)
	Took action: east
	Ended in state: (1, 2)
	Got reward: 0.0

	current state - environment.getCurrentState()
	Started in state: (1, 2)
	Took action: east
	Ended in state: (2, 2)
	Got reward: 0.0

	current state - environment.getCurrentState()
	Started in state: (2, 2)
	Took action: east
	Ended in state: (2, 1)
	Got reward: 0.0

	current state - environment.getCurrentState()
	Started in state: (2, 1)
	Took action: north
	Ended in state: (2, 2)
	Got reward: 0.0

	current state - environment.getCurrentState()
	Started in state: (2, 2)
	Took action: east
	Ended in state: (3, 2)
	Got reward: 0.0

	current state - environment.getCurrentState()
	Started in state: (3, 2)
	Took action: exit
	Ended in state: TERMINAL_STATE
	Got reward: 1

	current state - environment.getCurrentState()
	EPISODE 1 COMPLETE: RETURN WAS 0.4782969


	AVERAGE RETURNS FROM START STATE: 0.4782969


	python autograder.py -q q1

	output:

	Starting on 4-25 at 14:33:52

	Question q1
	===========

	*** FAIL: Exception raised: -1
	***
	*** Traceback (most recent call last):
	*** File "/home/karen/dev/ai/reinforcement/grading.py", line 71, in grade
	*** util.TimeoutFunction(getattr(gradingModule, q),300)(self) # Call the question's function
	*** File "/home/karen/dev/ai/reinforcement/util.py", line 608, in __call__
	*** result = self.function(args, *keyArgs)
	*** File "autograder.py", line 268, in <lambda>
	*** return lambda grades: question.execute(grades)
	*** File "/home/karen/dev/ai/reinforcement/testClasses.py", line 49, in execute
	*** if not f(grades):
	*** File "autograder.py", line 263, in <lambda>
	*** return lambda grades: testCase.execute(grades, moduleDict, solutionDict)
	*** File "reinforcementTestClasses.py", line 55, in execute
	*** testPass, stdOutString, fileOutString = self.executeNIterations(grades, moduleDict, solutionDict, n, checkPolicy)
	*** File "reinforcementTestClasses.py", line 68, in executeNIterations
	*** valuesPretty, qValuesPretty, actions, policyPretty = self.runAgent(moduleDict, n)
	*** File "reinforcementTestClasses.py", line 127, in runAgent
	*** policy[state] = agent.computeActionFromValues(state)
	*** File "valueIterationAgents.py", line 145, in computeActionFromValues
	*** bestAction,stateValue = ValueDict[self.iterations - 1]
	*** KeyError: -1
	***

	### Question q1: 0/6 ###


	Finished at 14:33:52

	Provisional grades
	==================
	Question q1: 0/6
	------------------
	Total: 0/6

	Your grades are NOT yet registered. To register your grades you must
	submit your files to the edX website. The grades obtained through the
	edX website are your final grades unless your submission was not in
	the spirit of the course, such as if your submission simply hardcoded
	the answers to the tests. We will screen for this after the deadline.

	If you worked with a partner, you must both submit separately.

	python gridworld.py -a value -i 100 -k 10

	error output:
	karen@scary:~/dev/ai/reinforcement> python gridworld.py -a value -i 100 -k 10 > gridword100iters10episodes.out
	Traceback (most recent call last):
	File "gridworld.py", line 593, in <module>
	returns += runEpisode(a, env, opts.discount, decisionCallback, displayCallback, messageCallback, pauseCallback, episode)
	File "gridworld.py", line 363, in runEpisode
	nextState, reward = environment.doAction(action)
	File "/home/karen/dev/ai/reinforcement/gridworld.py", line 189, in doAction
	(nextState, reward) = self.getRandomNextState(state, action)
	File "/home/karen/dev/ai/reinforcement/gridworld.py", line 200, in getRandomNextState
	successors = self.gridWorld.getTransitionStatesAndProbs(state, action)
	File "/home/karen/dev/ai/reinforcement/gridworld.py", line 119, in getTransitionStatesAndProbs
	raise "Illegal action!"
	TypeError: exceptions must be old-style classes or derived from BaseException, not str
	karen@scary:~/dev/ai/reinforcement>


	'''