I hereby claim:
- I am fohria on github.
- I am foh (https://keybase.io/foh) on keybase.
- I have a public key ASBayWEcSsTN-vp0LvAW4683N5MaA_nWHMqQ8z6v-9ksaAo
To claim this, I am signing this object:
import theano | |
import theano.tensor as tt | |
import numpy as np | |
import pymc3 as pm | |
def update_qvalsQL(action, reward, qvals, alpha, tau, gamma): | |
probs = tt.nnet.softmax(qvals * tau) | |
probs = probs[0] # because softmax returns array inside array |
""" just run this until you get a reasonable average for the sequence | |
for good arm we use rewards of 2-10 so average should be 6 | |
for bad arm we use rewards of 1-8 so average should be 4.5 """ | |
import numpy as np | |
_PRECISION = 0.05 # how far from ideal average our sequence can be | |
_SEQ_LENGTH = 50 # length of reward sequence | |
_BAD_ARM_LOW = 1 | |
_BAD_ARM_HIGH = 8 |
# define our comma separated sequences of reward values | |
# average of first 50 is 6.04, average of 51-100 is 4.5 | |
green_rewards = [6, 8, 4, 3, 9, 5, 3, 6, 6, 5, 3, 4, 8, 4, 9, 2, 7, 5, 8, 3, 4, 5, 5, | |
3, 9, 3, 7, 8, 8, 9, 7, 6, 9, 4, 8, 9, 9, 8, 8, 5, 8, 3, 7, 7, 7, 3, | |
2, 8, 8, 7, 1, 4, 1, 5, 2, 6, 7, 6, 4, 7, 7, 7, 5, 7, 4, 4, 4, 2, 1, 7, 6, 5, 6, 6, 6, 6, 5, 6, 1, 3, 2, 5, 7, 7, 1, 4, 3, 3, 6, 2, 6, 4, 4, 5, 7, 7, 7, 2, 1, 1] | |
# average of first 50 is 4.46, average of 51-100 is 6.02 | |
orange_rewards = [4, 5, 4, 4, 4, 1, 2, 6, 5, 7, 7, 2, 5, 6, 4, 5, 6, 5, 5, 1, 4, 1, 3, 6, 2, 7, 3, 7, 5, 1, 6, 6, 1, 6, 6, 1, 3, 4, 7, 5, 2, 6, 6, 4, 7, 6, 7, 7, 2, 4, 7, 4, 9, 9, 9, 2, 3, 9, 8, 7, 5, 6, 4, 5, 6, 7, 7, 4, 9, 6, 6, 3, 4, | |
7, 7, 9, 7, 5, 4, 6, 5, 8, 8, 3, 2, 6, 3, 7, 5, 2, 8, 8, 9, 3, 4, 7, | |
7, 7, 6, 9] |
import numpy as np | |
import pandas as pd | |
from scipy.optimize import minimize | |
import sys | |
from multiprocessing import Pool | |
# local modules and functions imported here | |
sys.path.append('../experiment/simulation') | |
from utils import softmax, autocorrelation | |
class MaxLike(object): |
I hereby claim:
To claim this, I am signing this object: