Skip to content

Instantly share code, notes, and snippets.

View jknthn's full-sized avatar

Jeremi Kaczmarczyk jknthn

View GitHub Profile
def generate_problem(k):
return np.random.normal(loc=0.0, scale=1, size=10)
def generate_reward(problem, action):
return np.random.normal(loc=problem[action], scale=1)
def k_bandit(problem, k, steps, exploration_rate):
Q = {i: 0 for i in range(k)} # 1. Value function
N = {i: 0 for i in range(k)} # 2. Number of actions, for update rule
extension MapViewController: DJIVideoFeedListener {
func videoFeed(_ videoFeed: DJIVideoFeed, didUpdateVideoData videoData: Data) {
let data = NSData(data: videoData)
var video = videoData
video.withUnsafeMutableBytes { (pointer: UnsafeMutablePointer<UInt8>) in
VideoPreviewer.instance().push(pointer, length: Int32(data.length))
}
}
}
VideoPreviewer.instance().setView(videoPreview)
DJISDKManager.videoFeeder()?.primaryVideoFeed.add(self, with: nil)
VideoPreviewer.instance().start()
@objc private func initializeMission() {
// 1.
if mission.points.count < 2 {
print("Not enough waypoints")
return
}
// 2.
if let mission = djiMission {
extension MapViewController: DJISDKManagerDelegate {
func appRegisteredWithError(_ error: Error?) {
if let error = error {
print(error.localizedDescription)
} else {
DJISDKManager.startConnectionToProduct()
}
}
DJISDKManager.registerApp(with: self)
def double_Q_learning(env, episodes=100, step_size=0.01, exploration_rate=0.01):
policy = utils.create_random_policy(env) # Create policy, just for the util function to create Q
# 1. Initialize value dictionaries formated: { S1: { A1: 0.0, A2: 0.0, ...}, ...}
Q_1 = create_state_action_dictionary(env, policy)
Q_2 = create_state_action_dictionary(env, policy)
# 2. Loop through the number of episodes
for episode in range(episodes):
env.reset() # Gym environment reset
def Q_learning(env, episodes=100, step_size=0.01, exploration_rate=0.01):
policy = utils.create_random_policy(env) # Create policy, just for the util function to create Q
Q = create_state_action_dictionary(env, policy) # 1. Initialize value dictionary formated: { S1: { A1: 0.0, A2: 0.0, ...}, ...}
# 2. Loop through the number of episodes
for episode in range(episodes):
env.reset() # Gym environment reset
S = env.env.s # 3. Getting State
finished = False
def sarsa(env, episodes=100, step_size=0.01, exploration_rate=0.01):
policy = utils.create_random_policy(env) # Create policy, just for the util function to create Q
Q = create_state_action_dictionary(env, policy) # 1. Initialize value dictionary formated: { S1: { A1: 0.0, A2: 0.0, ...}, ...}
# 2. Loop through the number of episodes
for episode in range(episodes):
env.reset() # Gym environment reset
S = env.env.s # 3. Getting State
A = greedy_policy(Q)[S] # 4. Deciding on first action
finished = False
def policy_iterator(env, n, t, epsilon=0.01):
random_policy = create_random_policy(env)
random_policy_score = test_policy(random_policy, env)
best_policy = (random_policy, random_policy_score)
for i in tqdm.tqdm(range(t)):
new_policy = monte_carlo_e_soft(env, policy=best_policy[0], episodes=n, epsilon=epsilon)
new_policy_score = test_policy(new_policy, env)
if new_policy_score > best_policy[1]:
best_policy = (new_policy, new_policy_score)