Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import random | |
import json | |
import scipy.stats as ss | |
import multiprocessing as mp | |
import time | |
import sys | |
import torch | |
import torch.nn as nn |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import argparse | |
import tensorflow as tf | |
import time | |
import random | |
# This is an improved version of Q_Basic. It has experience replay, and remembers previous transitions to train on again. | |
# In order to fix non-convergence problems, I manually put a reward of -200 when failing to reach 200 timesteps, and I | |
# run 10 supervised training updates after each episode. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
import numpy as np | |
import random | |
import gym | |
import math | |
import matplotlib.pyplot as plt | |
def softmax(x): | |
e_x = np.exp(x - np.max(x)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gym | |
import numpy as np | |
def run_episode(env, parameters): | |
observation = env.reset() | |
totalreward = 0 | |
counter = 0 | |
for _ in xrange(200): | |
# env.render() | |
action = 0 if np.matmul(parameters,observation) < 0 else 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gym | |
import numpy as np | |
def run_episode(env, parameters): | |
observation = env.reset() | |
totalreward = 0 | |
while True: | |
env.render() | |
action = 0 if np.matmul(parameters,observation) < 0 else 1 | |
observation, reward, done, info = env.step(action) |