Kevin Frans kvfrans

## flow_comparison.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                kvfrans
                / flow_comparison.ipynb
            
            
              Created
              April 24, 2024 18:22
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## lang.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                kvfrans
                / lang.ipynb
            
            
              Created
              November 12, 2021 21:07
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## evo.py
import numpy as np
import random
import json
import scipy.stats as ss
import multiprocessing as mp
import time
import sys

import torch
import torch.nn as nn

## q_experience_replay.py
import numpy as np
import argparse
import tensorflow as tf
import time
import random

# This is an improved version of Q_Basic. It has experience replay, and remembers previous transitions to train on again.
# In order to fix non-convergence problems, I manually put a reward of -200 when failing to reach 200 timesteps, and I
# run 10 supervised training updates after each episode.

## cartpole-policygradient.py
import tensorflow as tf
import numpy as np
import random
import gym
import math
import matplotlib.pyplot as plt


def softmax(x):
    e_x = np.exp(x - np.max(x))

## cartpole-hill.py
import gym
import numpy as np

def run_episode(env, parameters):
    observation = env.reset()
    totalreward = 0
    counter = 0
    for _ in xrange(200):
        # env.render()
        action = 0 if np.matmul(parameters,observation) < 0 else 1

## cartpole.py
import gym
import numpy as np

def run_episode(env, parameters):
    observation = env.reset()
    totalreward = 0
    while True:
        env.render()
        action = 0 if np.matmul(parameters,observation) < 0 else 1
        observation, reward, done, info = env.step(action)
	import numpy as np
	import random
	import json
	import scipy.stats as ss
	import multiprocessing as mp
	import time
	import sys

	import torch
	import torch.nn as nn
	import numpy as np
	import argparse
	import tensorflow as tf
	import time
	import random

	# This is an improved version of Q_Basic. It has experience replay, and remembers previous transitions to train on again.
	# In order to fix non-convergence problems, I manually put a reward of -200 when failing to reach 200 timesteps, and I
	# run 10 supervised training updates after each episode.
	import gym
	import numpy as np

	def run_episode(env, parameters):
	observation = env.reset()
	totalreward = 0
	counter = 0
	for _ in xrange(200):
	# env.render()
	action = 0 if np.matmul(parameters,observation) < 0 else 1