Skip to content

Instantly share code, notes, and snippets.

View JaeDukSeo's full-sized avatar
🙏
Praying

J JaeDukSeo

🙏
Praying
View GitHub Profile
def riemannint(function,a,b,n):
sumval = 0
h = (b-a)/n
for i in range(0,n-1):
current_x = a+i*h
sumval = sumval + function(current_x) * h
return sumval
def riemannint2(function,a,b,n):
sumval = 0
h = (b-a)/n
def trapezeint1(function,a,b,n):
h = (b-a)/n
sumval = 0
for i in range(0,n-1):
x = a + i * h
sumval = sumval+2*function(x)
sumval = h*(sumval+function(a)+function(b))/2
return sumval
def trapezeint2(function,a,b,n):
h = (b-a)/n
def simpsonint1(function,a,b,n):
h = (b-a)/n
m = n/2
sumval = 0
if n % 2 == 0:
for i in range(1,int(m-1)):
x = a + 2*i*h
sumval = sumval+2*function(x);
for i in range(1,int(m)):
x = a+(2*i-1)*h;
def trapezearea(function,a,b):
h = (b-a)
area = h*(function(a)+function(b))/2
return area
def adaptint(function,a,b,tol=1e-8):
h = (b-1)
m = (b+1)/2
area = 0
areatot = trapezearea(function,a,b)
nextareatot = trapezearea(function,a,m) + trapezearea(function,m,b)
def montecarlo(function,a,b,n):
sumval = 0.0
the_range = np.random.uniform(a,b,n)
for i in the_range:
i = float(i)
sumval = sumval + function(i)
sumval = (b-a)/n * sumval
return sumval
# a vectorized
a_expect = np.zeros((num_ep,num_bandit))
for eps in range(num_ep):
temp_expect = np.zeros(num_bandit)
temp_choice = np.zeros(num_bandit)
for iter in range(num_iter//10):
temp_choice = temp_choice + 1
current_reward = np.random.uniform(0,1,num_bandit) < gt_prob
# b greedy
b_pull_count = np.zeros((num_ep,num_bandit))
b_estimation = np.zeros((num_ep,num_bandit))
b_reward = np.zeros((num_ep,num_iter))
b_optimal_pull = np.zeros((num_ep,num_iter))
b_regret_total = np.zeros((num_ep,num_iter))
for eps in range(num_ep):
temp_pull_count = np.zeros(num_bandit)
temp_estimation = np.zeros(num_bandit) + np.random.uniform(0,1,num_bandit)
# c e greedy
c_pull_count = np.zeros((num_ep,num_bandit))
c_estimation = np.zeros((num_ep,num_bandit))
c_reward = np.zeros((num_ep,num_iter))
c_optimal_pull = np.zeros((num_ep,num_iter))
c_regret_total = np.zeros((num_ep,num_iter))
for eps in range(num_ep):
epsilon = np.random.uniform(0,1)
temp_pull_count = np.zeros(num_bandit)
# d decy e greedy
d_pull_count = np.zeros((num_ep,num_bandit))
d_estimation = np.zeros((num_ep,num_bandit))
d_reward = np.zeros((num_ep,num_iter))
d_optimal_pull = np.zeros((num_ep,num_iter))
d_regret_total = np.zeros((num_ep,num_iter))
for eps in range(num_ep):
epsilon = 1.0
temp_pull_count = np.zeros(num_bandit)
# e Linear Reward Inaction
e_pull_count = np.zeros((num_ep,num_bandit))
e_estimation = np.zeros((num_ep,num_bandit))
e_reward = np.zeros((num_ep,num_iter))
e_optimal_pull = np.zeros((num_ep,num_iter))
e_regret_total = np.zeros((num_ep,num_iter))
for eps in range(num_ep):
learning_rate = 0.1
temp_pull_count = np.zeros(num_bandit)