Skip to content

Instantly share code, notes, and snippets.

# -*- coding: utf-8 -*-
import tensorflow as tf
import threading
import numpy as np
import signal
import random
import math
import os
import time
import numpy as np
from numpy.linalg import inv
x_observations = np.array([4000, 4260, 4550, 4860, 5110])
v_observations = np.array([280, 282, 285, 286, 290])
z = np.c_[x_observations, v_observations]
# Initial Conditions
a = 2 # Acceleration
import gym
from gym import wrappers
import numpy as np
env = gym.make("FrozenLake-v0")
env = wrappers.Monitor(env, "./results", force=True)
Q = np.zeros([env.observation_space.n, env.action_space.n])
n_s_a = np.zeros([env.observation_space.n, env.action_space.n])
import gym
import numpy
impoort time
#Function for a random policy
def randomPolicy():
return numpy.random.choice(4, size=((16)))
#Execution
def execute(env, policy, episode_len=100, render=False):
reward = 0
obs = env.reset()