Skip to content

Instantly share code, notes, and snippets.

View kvfrans's full-sized avatar
🐴
horse

Kevin Frans kvfrans

🐴
horse
View GitHub Profile
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@kvfrans
kvfrans / evo.py
Created March 12, 2021 08:39
Code for "Selection for Selection".
import numpy as np
import random
import json
import scipy.stats as ss
import multiprocessing as mp
import time
import sys
import torch
import torch.nn as nn
@kvfrans
kvfrans / q_experience_replay.py
Created July 29, 2016 23:46
Q-learn w/ function approximator and experience replay
import numpy as np
import argparse
import tensorflow as tf
import time
import random
# This is an improved version of Q_Basic. It has experience replay, and remembers previous transitions to train on again.
# In order to fix non-convergence problems, I manually put a reward of -200 when failing to reach 200 timesteps, and I
# run 10 supervised training updates after each episode.
import tensorflow as tf
import numpy as np
import random
import gym
import math
import matplotlib.pyplot as plt
def softmax(x):
e_x = np.exp(x - np.max(x))
@kvfrans
kvfrans / cartpole-hill.py
Created June 30, 2016 05:03
cartpole solver by greedily adding noise to linear function
import gym
import numpy as np
def run_episode(env, parameters):
observation = env.reset()
totalreward = 0
counter = 0
for _ in xrange(200):
# env.render()
action = 0 if np.matmul(parameters,observation) < 0 else 1
@kvfrans
kvfrans / cartpole.py
Created June 30, 2016 01:02
CartPole using random weights
import gym
import numpy as np
def run_episode(env, parameters):
observation = env.reset()
totalreward = 0
while True:
env.render()
action = 0 if np.matmul(parameters,observation) < 0 else 1
observation, reward, done, info = env.step(action)