View k-armed-bandit.py
def generate_problem(k): | |
return np.random.normal(loc=0.0, scale=1, size=10) | |
def generate_reward(problem, action): | |
return np.random.normal(loc=problem[action], scale=1) | |
def k_bandit(problem, k, steps, exploration_rate): | |
Q = {i: 0 for i in range(k)} # 1. Value function | |
N = {i: 0 for i in range(k)} # 2. Number of actions, for update rule | |
View VideoExtension.swift
extension MapViewController: DJIVideoFeedListener { | |
func videoFeed(_ videoFeed: DJIVideoFeed, didUpdateVideoData videoData: Data) { | |
let data = NSData(data: videoData) | |
var video = videoData | |
video.withUnsafeMutableBytes { (pointer: UnsafeMutablePointer<UInt8>) in | |
VideoPreviewer.instance().push(pointer, length: Int32(data.length)) | |
} | |
} | |
} |
View VideoPreview.swift
VideoPreviewer.instance().setView(videoPreview) | |
DJISDKManager.videoFeeder()?.primaryVideoFeed.add(self, with: nil) | |
VideoPreviewer.instance().start() |
View Waypoints.swift
@objc private func initializeMission() { | |
// 1. | |
if mission.points.count < 2 { | |
print("Not enough waypoints") | |
return | |
} | |
// 2. | |
if let mission = djiMission { |
View DJISDKManagerDelegateExtension.swift
extension MapViewController: DJISDKManagerDelegate { | |
func appRegisteredWithError(_ error: Error?) { | |
if let error = error { | |
print(error.localizedDescription) | |
} else { | |
DJISDKManager.startConnectionToProduct() | |
} | |
} | |
View Register.swift
DJISDKManager.registerApp(with: self) |
View double-q-learning.py
def double_Q_learning(env, episodes=100, step_size=0.01, exploration_rate=0.01): | |
policy = utils.create_random_policy(env) # Create policy, just for the util function to create Q | |
# 1. Initialize value dictionaries formated: { S1: { A1: 0.0, A2: 0.0, ...}, ...} | |
Q_1 = create_state_action_dictionary(env, policy) | |
Q_2 = create_state_action_dictionary(env, policy) | |
# 2. Loop through the number of episodes | |
for episode in range(episodes): | |
env.reset() # Gym environment reset |
View q-learning.py
def Q_learning(env, episodes=100, step_size=0.01, exploration_rate=0.01): | |
policy = utils.create_random_policy(env) # Create policy, just for the util function to create Q | |
Q = create_state_action_dictionary(env, policy) # 1. Initialize value dictionary formated: { S1: { A1: 0.0, A2: 0.0, ...}, ...} | |
# 2. Loop through the number of episodes | |
for episode in range(episodes): | |
env.reset() # Gym environment reset | |
S = env.env.s # 3. Getting State | |
finished = False | |
View sarsa.py
def sarsa(env, episodes=100, step_size=0.01, exploration_rate=0.01): | |
policy = utils.create_random_policy(env) # Create policy, just for the util function to create Q | |
Q = create_state_action_dictionary(env, policy) # 1. Initialize value dictionary formated: { S1: { A1: 0.0, A2: 0.0, ...}, ...} | |
# 2. Loop through the number of episodes | |
for episode in range(episodes): | |
env.reset() # Gym environment reset | |
S = env.env.s # 3. Getting State | |
A = greedy_policy(Q)[S] # 4. Deciding on first action | |
finished = False |
View policy_iterator.py
def policy_iterator(env, n, t, epsilon=0.01): | |
random_policy = create_random_policy(env) | |
random_policy_score = test_policy(random_policy, env) | |
best_policy = (random_policy, random_policy_score) | |
for i in tqdm.tqdm(range(t)): | |
new_policy = monte_carlo_e_soft(env, policy=best_policy[0], episodes=n, epsilon=epsilon) | |
new_policy_score = test_policy(new_policy, env) | |
if new_policy_score > best_policy[1]: | |
best_policy = (new_policy, new_policy_score) |
NewerOlder