chinmay8bit

## filling_T_and_R.py
def fill_in_transition(self):
    """
    Compute the transition matrix of the grid
    input: /
    output: T {np.array} -- the transition matrix of the grid
    """
    T = np.zeros((self.state_size, self.state_size, self.action_size)) # Empty matrix of dimension S*S*A

    ####
    # Add your code here

## value_iteration.py
def policy_iteration(self, threshold = 0.0001, gamma = 0.8):
    """
    Policy iteration on GridWorld
    input:
      - threshold {float} -- threshold value used to stop the policy iteration algorithm
      - gamma {float} -- discount factor
    output:
      - policy {np.array} -- policy found using the policy iteration algorithm
      - V {np.array} -- value function corresponding to the policy
      - epochs {int} -- number of epochs to find this policy

## fibonacii_formual.ipynb

      
        
          
            
              
              1 file
            
          
          
            
              
              0 forks
            
          
            
              
                
                0 comments
              
            
          
            
              
              0 stars
            
          
        
        
          
              
          
          
            
                chinmay8bit
                / fibonacii_formual.ipynb
            
            
              Last active
              September 16, 2024 14:23
            
              
                Untitled3.ipynb
              
          
        
      
        
  
    
    

          
    
      
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	def fill_in_transition(self):
	"""
	Compute the transition matrix of the grid
	input: /
	output: T {np.array} -- the transition matrix of the grid
	"""
	T = np.zeros((self.state_size, self.state_size, self.action_size)) # Empty matrix of dimension SSA

	####
	# Add your code here
	def policy_iteration(self, threshold = 0.0001, gamma = 0.8):
	"""
	Policy iteration on GridWorld
	input:
	- threshold {float} -- threshold value used to stop the policy iteration algorithm
	- gamma {float} -- discount factor
	output:
	- policy {np.array} -- policy found using the policy iteration algorithm
	- V {np.array} -- value function corresponding to the policy
	- epochs {int} -- number of epochs to find this policy