Last active
January 14, 2019 09:52
-
-
Save JaeDukSeo/5f1d2ba3a1843766aca70e14264b7429 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# b greedy | |
b_pull_count = np.zeros((num_ep,num_bandit)) | |
b_estimation = np.zeros((num_ep,num_bandit)) | |
b_reward = np.zeros((num_ep,num_iter)) | |
b_optimal_pull = np.zeros((num_ep,num_iter)) | |
b_regret_total = np.zeros((num_ep,num_iter)) | |
for eps in range(num_ep): | |
temp_pull_count = np.zeros(num_bandit) | |
temp_estimation = np.zeros(num_bandit) + np.random.uniform(0,1,num_bandit) | |
temp_reward = np.zeros(num_iter) | |
temp_optimal_pull = np.zeros(num_iter) | |
temp_regret = np.zeros(num_iter) | |
for iter in range(num_iter): | |
# select bandit / get reward /increase count / update estimate | |
current_choice = np.argmax(temp_estimation) | |
current_reward = 1 if np.random.uniform(0,1) < gt_prob[current_choice] else 0 | |
temp_pull_count[current_choice] = temp_pull_count[current_choice] + 1 | |
temp_estimation[current_choice] = temp_estimation[current_choice] + (1/(temp_pull_count[current_choice]+1)) * (current_reward-temp_estimation[current_choice]) | |
# update reward and optimal choice | |
temp_reward[iter] = current_reward if iter == 0 else temp_reward[iter-1] + current_reward | |
temp_optimal_pull[iter] = 1 if current_choice == optimal_choice else 0 | |
temp_regret[iter] = gt_prob[optimal_choice] - gt_prob[current_choice] if iter == 0 else temp_regret[iter-1] + (gt_prob[optimal_choice] - gt_prob[current_choice]) | |
b_pull_count[eps,:] = temp_pull_count | |
b_estimation[eps,:] = temp_estimation | |
b_reward[eps,:] = temp_reward | |
b_optimal_pull[eps,:] = temp_optimal_pull | |
b_regret_total[eps,:] = temp_regret | |
print('Ground Truth') | |
print(gt_prob) | |
print('Expected ') | |
print(b_estimation.mean(0)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment