Skip to content

Instantly share code, notes, and snippets.

Jeremi Kaczmarczyk jknthn

Block or report user

Report or block jknthn

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
View k-armed-bandit.py
def generate_problem(k):
return np.random.normal(loc=0.0, scale=1, size=10)
def generate_reward(problem, action):
return np.random.normal(loc=problem[action], scale=1)
def k_bandit(problem, k, steps, exploration_rate):
Q = {i: 0 for i in range(k)} # 1. Value function
N = {i: 0 for i in range(k)} # 2. Number of actions, for update rule
View VideoExtension.swift
extension MapViewController: DJIVideoFeedListener {
func videoFeed(_ videoFeed: DJIVideoFeed, didUpdateVideoData videoData: Data) {
let data = NSData(data: videoData)
var video = videoData
video.withUnsafeMutableBytes { (pointer: UnsafeMutablePointer<UInt8>) in
VideoPreviewer.instance().push(pointer, length: Int32(data.length))
}
}
}
View VideoPreview.swift
VideoPreviewer.instance().setView(videoPreview)
DJISDKManager.videoFeeder()?.primaryVideoFeed.add(self, with: nil)
VideoPreviewer.instance().start()
View Waypoints.swift
@objc private func initializeMission() {
// 1.
if mission.points.count < 2 {
print("Not enough waypoints")
return
}
// 2.
if let mission = djiMission {
View DJISDKManagerDelegateExtension.swift
extension MapViewController: DJISDKManagerDelegate {
func appRegisteredWithError(_ error: Error?) {
if let error = error {
print(error.localizedDescription)
} else {
DJISDKManager.startConnectionToProduct()
}
}
View Register.swift
DJISDKManager.registerApp(with: self)
View double-q-learning.py
def double_Q_learning(env, episodes=100, step_size=0.01, exploration_rate=0.01):
policy = utils.create_random_policy(env) # Create policy, just for the util function to create Q
# 1. Initialize value dictionaries formated: { S1: { A1: 0.0, A2: 0.0, ...}, ...}
Q_1 = create_state_action_dictionary(env, policy)
Q_2 = create_state_action_dictionary(env, policy)
# 2. Loop through the number of episodes
for episode in range(episodes):
env.reset() # Gym environment reset
View q-learning.py
def Q_learning(env, episodes=100, step_size=0.01, exploration_rate=0.01):
policy = utils.create_random_policy(env) # Create policy, just for the util function to create Q
Q = create_state_action_dictionary(env, policy) # 1. Initialize value dictionary formated: { S1: { A1: 0.0, A2: 0.0, ...}, ...}
# 2. Loop through the number of episodes
for episode in range(episodes):
env.reset() # Gym environment reset
S = env.env.s # 3. Getting State
finished = False
View sarsa.py
def sarsa(env, episodes=100, step_size=0.01, exploration_rate=0.01):
policy = utils.create_random_policy(env) # Create policy, just for the util function to create Q
Q = create_state_action_dictionary(env, policy) # 1. Initialize value dictionary formated: { S1: { A1: 0.0, A2: 0.0, ...}, ...}
# 2. Loop through the number of episodes
for episode in range(episodes):
env.reset() # Gym environment reset
S = env.env.s # 3. Getting State
A = greedy_policy(Q)[S] # 4. Deciding on first action
finished = False
View policy_iterator.py
def policy_iterator(env, n, t, epsilon=0.01):
random_policy = create_random_policy(env)
random_policy_score = test_policy(random_policy, env)
best_policy = (random_policy, random_policy_score)
for i in tqdm.tqdm(range(t)):
new_policy = monte_carlo_e_soft(env, policy=best_policy[0], episodes=n, epsilon=epsilon)
new_policy_score = test_policy(new_policy, env)
if new_policy_score > best_policy[1]:
best_policy = (new_policy, new_policy_score)
You can’t perform that action at this time.