Skip to content

Instantly share code, notes, and snippets.

View thunderInfy's full-sized avatar
📈

Aditya Rastogi thunderInfy

📈
View GitHub Profile
#Problem Parameters
class jcp:
@staticmethod
def max_cars():
return 20
@staticmethod
def γ():
return 0.9
class poisson_:
def __init__(self, λ):
self.λ = λ
ε = 0.01
# [α , β] is the range of n's for which the pmf value is above ε
self.α = 0
state = 1
α = 0
β = 8
vals = {0: 0.05127513134990861,
1: 0.15084926808563653,
2: 0.2255298706374324,
3: 0.2255298706374324,
4: 0.1695194187235855,
5: 0.10230687642696924,
6: 0.051897469704506906,
#A class holding the properties of a location together
class location:
def __init__(self, req, ret):
self.α = req #value of lambda for requests
self.β = ret #value of lambda for returns
self.poissonα = poisson_(self.α)
self.poissonβ = poisson_(self.β)
#IMPORTS
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import poisson
import sys
#Problem Parameters
class jcp:
@staticmethod
#Initializing the value and policy matrices. Initial policy has zero value for all states.
value = np.zeros((jcp.max_cars()+1, jcp.max_cars()+1))
policy = value.copy().astype(int)
def expected_reward(state, action):
global value
"""
state : It's a pair of integers, # of cars at A and at B
action : # of cars transferred from A to B, -5 <= action <= 5
"""
ψ = 0 #reward
new_state = [max(min(state[0] - action, jcp.max_cars()),0) , max(min(state[1] + action, jcp.max_cars()),0)]
def policy_improvement():
global policy
policy_stable = True
for i in range(value.shape[0]):
for j in range(value.shape[1]):
old_action = policy[i][j]
max_act_val = None
while(1):
policy_evaluation()
ρ = policy_improvement()
save_value()
save_policy()
if ρ == True:
break
# adding reward for moving cars from one location to another (which is negative)
if action <= 0:
ψ = ψ + jcp.moving_reward() * abs(action)
else:
ψ = ψ + jcp.moving_reward() * (action - 1) #one car is moved by one of Jack's employees for free
# adding reward for second parking lot (which is also negative)
if new_state[0] > 10: