Skip to content

Instantly share code, notes, and snippets.

class SimpleSupplyChain(gym.Env):
def __init__(self, config):
self.reset()
self.action_space = Box(low=0.0, high=20.0) # Continuous space
self.observation_space = Box(low=-10000, high=10000)
def reset(self):
self.supply_chain = SupplyChainEnvironment()
self.state = self.supply_chain.initial_state()
return self.state.to_array()
Optimized policy parameters:
Factory (s, Q) = (0, 20)
Warehouse 1 (s, Q) = (5, 5)
Warehouse 2 (s, Q) = (5, 5)
Warehouse 3 (s ,Q) = (5, 10)
Achieved profit: 6871.0
from ax import optimize
def evaluate_sQPolicy(p):
policy = SQPolicy(
p['factory_s'],
p['factory_Q'],
[ p['w1_s'], p['w2_s'], p['w3_s'], ],
[ p['w1_Q'], p['w2_Q'], p['w3_Q'], ]
)
return np.mean(simulate(env, policy, num_episodes = 30))
class SQPolicy(object):
def __init__(self, factory_safety_stock,
factory_reorder_amount, safety_stock, reorder_amount):
self.factory_safety_stock = factory_safety_stock
self.factory_reorder_amount = factory_reorder_amount
self.safety_stock = safety_stock
self.reorder_amount = reorder_amount
def select_action(self, state):
action = Action(state.warehouse_num)
class SupplyChainEnvironment(object):
...
def step(self, state, action):
demands = np.fromfunction(lambda j: self.demand(j+1, self.t), (self.warehouse_num,))
# Calculating the reward (profit)
total_revenue = self.unit_price * np.sum(demands)
total_production_cost = self.unit_cost * action.production_level
total_storage_cost = np.dot( self.storage_costs,
np.maximum(state.stock_levels(), np.zeros(self.warehouse_num + 1)) )
class SupplyChainEnvironment(object):
...
def demand(self, j, t): # Demand at warehouse j at time t
return np.round(self.d_max/2 +
self.d_max/2*np.sin(2*np.pi*(t + 2*j)/self.T*2) +
np.random.randint(0, self.d_var))
class SupplyChainEnvironment(object):
def __init__(self):
self.T = 26 # Episode duration
self.warehouse_num = 3
self.d_max = 5 # Maximum demand, units
self.d_var = 2 # Maximum random demand variation, units
self.unit_price = 100 # Unit price in dollars
self.unit_cost = 40 # Unit cost in dollars
class State(object):
def __init__(self, warehouse_num, T, demand_history, t = 0):
self.warehouse_num = warehouse_num
self.factory_stock = 0
self.warehouse_stock = np.repeat(0, warehouse_num)
self.demand_history = demand_history
self.T = T # Length of one episode
self.t = t
def to_array(self):
import ray
import ray.rllib.agents.dqn as dqn
def train_dqn():
config = dqn.DEFAULT_CONFIG.copy()
config["log_level"] = "WARN"
config["train_batch_size"] = 256
config["buffer_size"] = 10000
config["hiddens"] = [128, 128, 128]
trainer = dqn.DQNTrainer(config=config, env=HiLoPricingEnv)
import gym
from gym.spaces import Discrete, Box
class HiLoPricingEnv(gym.Env):
def __init__(self, config):
self.reset()
self.action_space = Discrete(len(price_grid))
self.observation_space = Box(0, 10000, shape=(2*T, ), dtype=np.float32)
def reset(self):