J JaeDukSeo

## Riemann integration
def riemannint(function,a,b,n):
    sumval = 0
    h = (b-a)/n
    for i in range(0,n-1):
        current_x = a+i*h
        sumval    = sumval + function(current_x) * h
    return sumval
def riemannint2(function,a,b,n):
    sumval = 0
    h = (b-a)/n

## Trapeze integration
def trapezeint1(function,a,b,n):
    h = (b-a)/n
    sumval = 0
    for i in range(0,n-1):
        x = a + i * h
        sumval = sumval+2*function(x)
    sumval = h*(sumval+function(a)+function(b))/2
    return sumval
def trapezeint2(function,a,b,n):
    h = (b-a)/n

## Simpson's integration
def simpsonint1(function,a,b,n):
    h = (b-a)/n
    m = n/2
    sumval = 0
    if n % 2 == 0:
        for i in range(1,int(m-1)):
            x = a + 2*i*h
            sumval = sumval+2*function(x);
        for i in range(1,int(m)):
            x = a+(2*i-1)*h;

## Adaptive integration
def trapezearea(function,a,b):
    h    = (b-a)
    area = h*(function(a)+function(b))/2
    return area
def adaptint(function,a,b,tol=1e-8):
    h = (b-1)
    m = (b+1)/2
    area = 0
    areatot     = trapezearea(function,a,b)
    nextareatot = trapezearea(function,a,m) + trapezearea(function,m,b)

## Monte Carlo integration
def montecarlo(function,a,b,n):
    sumval = 0.0
    the_range = np.random.uniform(a,b,n)
    for i in the_range:
        i = float(i)
        sumval = sumval + function(i)
    sumval = (b-a)/n * sumval
    return sumval

## vector.py
# a vectorized
a_expect = np.zeros((num_ep,num_bandit))

for eps in range(num_ep):
    temp_expect = np.zeros(num_bandit)
    temp_choice = np.zeros(num_bandit)

    for iter in range(num_iter//10):
        temp_choice    = temp_choice + 1
        current_reward = np.random.uniform(0,1,num_bandit) < gt_prob

## greedy.py
# b greedy
b_pull_count   = np.zeros((num_ep,num_bandit))
b_estimation   = np.zeros((num_ep,num_bandit))
b_reward       = np.zeros((num_ep,num_iter))
b_optimal_pull = np.zeros((num_ep,num_iter))
b_regret_total = np.zeros((num_ep,num_iter))

for eps in range(num_ep):
    temp_pull_count   = np.zeros(num_bandit)
    temp_estimation   = np.zeros(num_bandit) + np.random.uniform(0,1,num_bandit)

## e-greedy.py
# c e greedy
c_pull_count   = np.zeros((num_ep,num_bandit))
c_estimation   = np.zeros((num_ep,num_bandit))
c_reward       = np.zeros((num_ep,num_iter))
c_optimal_pull = np.zeros((num_ep,num_iter))
c_regret_total = np.zeros((num_ep,num_iter))

for eps in range(num_ep):
    epsilon = np.random.uniform(0,1)
    temp_pull_count   = np.zeros(num_bandit)

## decay e-greedy.py
# d decy e greedy
d_pull_count   = np.zeros((num_ep,num_bandit))
d_estimation   = np.zeros((num_ep,num_bandit))
d_reward       = np.zeros((num_ep,num_iter))
d_optimal_pull = np.zeros((num_ep,num_iter))
d_regret_total = np.zeros((num_ep,num_iter))

for eps in range(num_ep):
    epsilon = 1.0
    temp_pull_count   = np.zeros(num_bandit)

## Linear Reward Inaction.py
# e Linear Reward Inaction
e_pull_count   = np.zeros((num_ep,num_bandit))
e_estimation   = np.zeros((num_ep,num_bandit))
e_reward       = np.zeros((num_ep,num_iter))
e_optimal_pull = np.zeros((num_ep,num_iter))
e_regret_total = np.zeros((num_ep,num_iter))

for eps in range(num_ep):
    learning_rate = 0.1
    temp_pull_count   = np.zeros(num_bandit)
	def riemannint(function,a,b,n):
	sumval = 0
	h = (b-a)/n
	for i in range(0,n-1):
	current_x = a+i*h
	sumval = sumval + function(current_x) * h
	return sumval
	def riemannint2(function,a,b,n):
	sumval = 0
	h = (b-a)/n
	def trapezeint1(function,a,b,n):
	h = (b-a)/n
	sumval = 0
	for i in range(0,n-1):
	x = a + i * h
	sumval = sumval+2*function(x)
	sumval = h*(sumval+function(a)+function(b))/2
	return sumval
	def trapezeint2(function,a,b,n):
	h = (b-a)/n
	def simpsonint1(function,a,b,n):
	h = (b-a)/n
	m = n/2
	sumval = 0
	if n % 2 == 0:
	for i in range(1,int(m-1)):
	x = a + 2ih
	sumval = sumval+2*function(x);
	for i in range(1,int(m)):
	x = a+(2i-1)h;
	def trapezearea(function,a,b):
	h = (b-a)
	area = h*(function(a)+function(b))/2
	return area
	def adaptint(function,a,b,tol=1e-8):
	h = (b-1)
	m = (b+1)/2
	area = 0
	areatot = trapezearea(function,a,b)
	nextareatot = trapezearea(function,a,m) + trapezearea(function,m,b)
	def montecarlo(function,a,b,n):
	sumval = 0.0
	the_range = np.random.uniform(a,b,n)
	for i in the_range:
	i = float(i)
	sumval = sumval + function(i)
	sumval = (b-a)/n * sumval
	return sumval
	# a vectorized
	a_expect = np.zeros((num_ep,num_bandit))

	for eps in range(num_ep):
	temp_expect = np.zeros(num_bandit)
	temp_choice = np.zeros(num_bandit)

	for iter in range(num_iter//10):
	temp_choice = temp_choice + 1
	current_reward = np.random.uniform(0,1,num_bandit) < gt_prob
	# b greedy
	b_pull_count = np.zeros((num_ep,num_bandit))
	b_estimation = np.zeros((num_ep,num_bandit))
	b_reward = np.zeros((num_ep,num_iter))
	b_optimal_pull = np.zeros((num_ep,num_iter))
	b_regret_total = np.zeros((num_ep,num_iter))

	for eps in range(num_ep):
	temp_pull_count = np.zeros(num_bandit)
	temp_estimation = np.zeros(num_bandit) + np.random.uniform(0,1,num_bandit)
	# c e greedy
	c_pull_count = np.zeros((num_ep,num_bandit))
	c_estimation = np.zeros((num_ep,num_bandit))
	c_reward = np.zeros((num_ep,num_iter))
	c_optimal_pull = np.zeros((num_ep,num_iter))
	c_regret_total = np.zeros((num_ep,num_iter))

	for eps in range(num_ep):
	epsilon = np.random.uniform(0,1)
	temp_pull_count = np.zeros(num_bandit)
	# d decy e greedy
	d_pull_count = np.zeros((num_ep,num_bandit))
	d_estimation = np.zeros((num_ep,num_bandit))
	d_reward = np.zeros((num_ep,num_iter))
	d_optimal_pull = np.zeros((num_ep,num_iter))
	d_regret_total = np.zeros((num_ep,num_iter))

	for eps in range(num_ep):
	epsilon = 1.0
	temp_pull_count = np.zeros(num_bandit)
	# e Linear Reward Inaction
	e_pull_count = np.zeros((num_ep,num_bandit))
	e_estimation = np.zeros((num_ep,num_bandit))
	e_reward = np.zeros((num_ep,num_iter))
	e_optimal_pull = np.zeros((num_ep,num_iter))
	e_regret_total = np.zeros((num_ep,num_iter))

	for eps in range(num_ep):
	learning_rate = 0.1
	temp_pull_count = np.zeros(num_bandit)