Henrik Siljebråt fohria

## markov_summary.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                fohria
                / markov_summary.ipynb
            
            
              Last active
              February 8, 2021 09:54
            
              
                markov stuffs
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## model.py
import theano
import theano.tensor as tt
import numpy as np
import pymc3 as pm

def update_qvalsQL(action, reward, qvals, alpha, tau, gamma):

    probs = tt.nnet.softmax(qvals * tau)
    probs = probs[0]  # because softmax returns array inside array

## reward_generation.py
""" just run this until you get a reasonable average for the sequence
    for good arm we use rewards of 2-10 so average should be 6
    for bad arm we use rewards of 1-8 so average should be 4.5 """

import numpy as np

_PRECISION = 0.05 # how far from ideal average our sequence can be
_SEQ_LENGTH = 50 # length of reward sequence
_BAD_ARM_LOW = 1
_BAD_ARM_HIGH = 8

## rewardfiller.py
# define our comma separated sequences of reward values
# average of first 50 is 6.04, average of 51-100 is 4.5
green_rewards = [6, 8, 4, 3, 9, 5, 3, 6, 6, 5, 3, 4, 8, 4, 9, 2, 7, 5, 8, 3, 4, 5, 5,
       3, 9, 3, 7, 8, 8, 9, 7, 6, 9, 4, 8, 9, 9, 8, 8, 5, 8, 3, 7, 7, 7, 3,
       2, 8, 8, 7, 1, 4, 1, 5, 2, 6, 7, 6, 4, 7, 7, 7, 5, 7, 4, 4, 4, 2, 1, 7, 6, 5, 6, 6, 6, 6, 5, 6, 1, 3, 2, 5, 7, 7, 1, 4, 3, 3, 6, 2, 6, 4, 4, 5, 7, 7, 7, 2, 1, 1]
# average of first 50 is 4.46, average of 51-100 is 6.02
orange_rewards = [4, 5, 4, 4, 4, 1, 2, 6, 5, 7, 7, 2, 5, 6, 4, 5, 6, 5, 5, 1, 4, 1, 3, 6, 2, 7, 3, 7, 5, 1, 6, 6, 1, 6, 6, 1, 3, 4, 7, 5, 2, 6, 6, 4, 7, 6, 7, 7, 2, 4, 7, 4, 9, 9, 9, 2, 3, 9, 8, 7, 5, 6, 4, 5, 6, 7, 7, 4, 9, 6, 6, 3, 4,
       7, 7, 9, 7, 5, 4, 6, 5, 8, 8, 3, 2, 6, 3, 7, 5, 2, 8, 8, 9, 3, 4, 7,
       7, 7, 6, 9]

## multiprocessing_pool
import numpy as np
import pandas as pd
from scipy.optimize import minimize
import sys
from multiprocessing import Pool
# local modules and functions imported here
sys.path.append('../experiment/simulation')
from utils import softmax, autocorrelation

class MaxLike(object):

## keybase.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                fohria
                / keybase.md
            
            
              Created
              February 15, 2016 09:36
            
              
                keybase proof
              
          
    Keybase proof

I hereby claim:

I am fohria on github.
I am foh (https://keybase.io/foh) on keybase.
I have a public key ASBayWEcSsTN-vp0LvAW4683N5MaA_nWHMqQ8z6v-9ksaAo

To claim this, I am signing this object:
	import theano
	import theano.tensor as tt
	import numpy as np
	import pymc3 as pm

	def update_qvalsQL(action, reward, qvals, alpha, tau, gamma):

	probs = tt.nnet.softmax(qvals * tau)
	probs = probs[0] # because softmax returns array inside array
	""" just run this until you get a reasonable average for the sequence
	for good arm we use rewards of 2-10 so average should be 6
	for bad arm we use rewards of 1-8 so average should be 4.5 """

	import numpy as np

	_PRECISION = 0.05 # how far from ideal average our sequence can be
	_SEQ_LENGTH = 50 # length of reward sequence
	_BAD_ARM_LOW = 1
	_BAD_ARM_HIGH = 8
	# define our comma separated sequences of reward values
	# average of first 50 is 6.04, average of 51-100 is 4.5
	green_rewards = [6, 8, 4, 3, 9, 5, 3, 6, 6, 5, 3, 4, 8, 4, 9, 2, 7, 5, 8, 3, 4, 5, 5,
	3, 9, 3, 7, 8, 8, 9, 7, 6, 9, 4, 8, 9, 9, 8, 8, 5, 8, 3, 7, 7, 7, 3,
	2, 8, 8, 7, 1, 4, 1, 5, 2, 6, 7, 6, 4, 7, 7, 7, 5, 7, 4, 4, 4, 2, 1, 7, 6, 5, 6, 6, 6, 6, 5, 6, 1, 3, 2, 5, 7, 7, 1, 4, 3, 3, 6, 2, 6, 4, 4, 5, 7, 7, 7, 2, 1, 1]
	# average of first 50 is 4.46, average of 51-100 is 6.02
	orange_rewards = [4, 5, 4, 4, 4, 1, 2, 6, 5, 7, 7, 2, 5, 6, 4, 5, 6, 5, 5, 1, 4, 1, 3, 6, 2, 7, 3, 7, 5, 1, 6, 6, 1, 6, 6, 1, 3, 4, 7, 5, 2, 6, 6, 4, 7, 6, 7, 7, 2, 4, 7, 4, 9, 9, 9, 2, 3, 9, 8, 7, 5, 6, 4, 5, 6, 7, 7, 4, 9, 6, 6, 3, 4,
	7, 7, 9, 7, 5, 4, 6, 5, 8, 8, 3, 2, 6, 3, 7, 5, 2, 8, 8, 9, 3, 4, 7,
	7, 7, 6, 9]
	import numpy as np
	import pandas as pd
	from scipy.optimize import minimize
	import sys
	from multiprocessing import Pool
	# local modules and functions imported here
	sys.path.append('../experiment/simulation')
	from utils import softmax, autocorrelation

	class MaxLike(object):