Tsunemichi Harada tsu-nera

## CEM.py
import numpy as np
import gym
from gym.spaces import Discrete,Box

# -------------------------------------------
# Policies
# -------------------------------------------

class DeterministicDiscreteActionLinearPolicy(object):

## midi_to_txt.rb

require 'midilib'

seq = MIDI::Sequence.new()
File.open(ARGV[0], 'rb') { |file| seq.read(file) }

events = [ ]
id = 0

seq.tracks.each do |track|

## cem.py
import gym
import numpy as np
import matplotlib.pyplot as plt
env = gym.make('CartPole-v0')
env.render(close=True)
#vector of means(mu) and standard dev(sigma) for each paramater
mu=np.random.uniform(size=state.shape)
sigma=np.random.uniform(low=0.001,size=state.shape)


## CEMgym.py
#Most code from John Schulman's MLSS talk on Deep Reinforcement Learning
#http://rl-gym-doc.s3-website-us-west-2.amazonaws.com/mlss/lab1.html#szitalorincz06

import numpy as np
import gym
from gym.spaces import Discrete, Box

# ================================================================
# Policies
# ================================================================

## docsearch-pageindexer.py
import os
import sys
import yaml
import json

# base url to use
base_url = "http://localhost:1313"

# The attribute mapping for docsearch.
#

## cem.md

      
              2 files
            
          
              9 forks
            
          
              0 comments
            
          
              46 stars
            
          
                kashif
                / cem.md
            
            
              Last active
              November 7, 2023 12:56
            
              
                Cross Entropy Method
              
          
    Cross Entropy Method

How do we solve  for the policy optimization problem which is to maximize the total reward given some parametrized policy?
Discounted future reward

To begin with, for an episode the total reward is the sum of all the rewards. If our environment is stochastic, we can never be sure if we will get the same rewards the next time we perform the same actions. Thus the more we go into the future the more the total future reward may diverge. So for that reason it is common to use the discounted future reward where the parameter discount is called the discount factor and is between 0 and 1.
A good strategy for an agent would be to always choose an action that maximizes the (discounted) future reward. In other words we want to maximize the expected reward per episode.

  
## install_ruby
#!/bin/bash

set -eou pipefail

version=8.6.4.1
patchinfo=299124-linux-x86_64-threaded
dir=ActiveTcl$version.$patchinfo
package=$dir.tar.gz
url=http://downloads.activestate.com/ActiveTcl/releases/$version/$package

## cartpole.py

import gym
from tqdm import tqdm_notebook
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

def get_random_policy():
    return np.random.choice(n_actions, tuple(bins))

## sample-data-bash-helpers.sh
#
# add to bashrc
#

# files
alias sampletree='mkdir -p sample/{train,test,valid}'
lsn(){ matchdir=`pwd`/$2; find $matchdir -type f | grep -v sample | shuf -n $1 | awk -F`pwd` '{print "."$NF}' ; }
# shuffle mv/cp
cpn(){ matchdir=`pwd`/$2; find $matchdir -type f | grep -v sample | shuf -n $1 | awk -F`pwd` '{print "."$NF" sample"$NF}' | xargs -t -n2 cp ; }
mvn(){ matchdir=`pwd`/$2; todir=`pwd`/$3; find $matchdir -type f | grep -v sample | shuf -n $1 | awk -F`pwd` -v todir="$todir" '{print $0" "todir}' | xargs -t -n2 mv ; }

## racetrack_sarsa.ipynb

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              2 stars
            
          
                pat-coady
                / racetrack_sarsa.ipynb
            
            
              Last active
              April 21, 2024 21:01
            
              
                Sutton and Barto Racetrack: Sarsa
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	import numpy as np
	import gym
	from gym.spaces import Discrete,Box

	# -------------------------------------------
	# Policies
	# -------------------------------------------

	class DeterministicDiscreteActionLinearPolicy(object):

	require 'midilib'

	seq = MIDI::Sequence.new()
	File.open(ARGV[0], 'rb') { \|file\| seq.read(file) }

	events = [ ]
	id = 0

	seq.tracks.each do \|track\|
	import gym
	import numpy as np
	import matplotlib.pyplot as plt
	env = gym.make('CartPole-v0')
	env.render(close=True)
	#vector of means(mu) and standard dev(sigma) for each paramater
	mu=np.random.uniform(size=state.shape)
	sigma=np.random.uniform(low=0.001,size=state.shape)
	#Most code from John Schulman's MLSS talk on Deep Reinforcement Learning
	#http://rl-gym-doc.s3-website-us-west-2.amazonaws.com/mlss/lab1.html#szitalorincz06

	import numpy as np
	import gym
	from gym.spaces import Discrete, Box

	# ================================================================
	# Policies
	# ================================================================
	import os
	import sys
	import yaml
	import json

	# base url to use
	base_url = "http://localhost:1313"

	# The attribute mapping for docsearch.
	#
	#!/bin/bash

	set -eou pipefail

	version=8.6.4.1
	patchinfo=299124-linux-x86_64-threaded
	dir=ActiveTcl$version.$patchinfo
	package=$dir.tar.gz
	url=http://downloads.activestate.com/ActiveTcl/releases/$version/$package

	import gym
	from tqdm import tqdm_notebook
	import numpy as np
	import matplotlib.pyplot as plt
	import warnings
	warnings.filterwarnings('ignore')

	def get_random_policy():
	return np.random.choice(n_actions, tuple(bins))
	#
	# add to bashrc
	#

	# files
	alias sampletree='mkdir -p sample/{train,test,valid}'
	lsn(){ matchdir=`pwd`/$2; find $matchdir -type f \| grep -v sample \| shuf -n $1 \| awk -F`pwd` '{print "."$NF}' ; }
	# shuffle mv/cp
	cpn(){ matchdir=`pwd`/$2; find $matchdir -type f \| grep -v sample \| shuf -n $1 \| awk -F`pwd` '{print "."$NF" sample"$NF}' \| xargs -t -n2 cp ; }
	mvn(){ matchdir=`pwd`/$2; todir=`pwd`/$3; find $matchdir -type f \| grep -v sample \| shuf -n $1 \| awk -F`pwd` -v todir="$todir" '{print $0" "todir}' \| xargs -t -n2 mv ; }