Skip to content

Instantly share code, notes, and snippets.

View jknthn's full-sized avatar

Jeremi Kaczmarczyk jknthn

View GitHub Profile
def generate_problem(k):
return np.random.normal(loc=0.0, scale=1, size=10)
def generate_reward(problem, action):
return np.random.normal(loc=problem[action], scale=1)
def k_bandit(problem, k, steps, exploration_rate):
Q = {i: 0 for i in range(k)} # 1. Value function
N = {i: 0 for i in range(k)} # 2. Number of actions, for update rule
def iterative_policy_evaluation(policy, theta=0.01, discount_rate=0.5):
V_s = {i: 0 for i in range(16)} # 1.
probablitiy_map = create_probability_map() # 2.
delta = 100 # 3.
while not delta < theta: # 4.
delta = 0 # 5.
for state in range(16): # 6.
v = V_s[state] # 7.
extension MapViewController: DJIVideoFeedListener {
func videoFeed(_ videoFeed: DJIVideoFeed, didUpdateVideoData videoData: Data) {
let data = NSData(data: videoData)
var video = videoData
video.withUnsafeMutableBytes { (pointer: UnsafeMutablePointer<UInt8>) in
VideoPreviewer.instance().push(pointer, length: Int32(data.length))
}
}
}
VideoPreviewer.instance().setView(videoPreview)
DJISDKManager.videoFeeder()?.primaryVideoFeed.add(self, with: nil)
VideoPreviewer.instance().start()
@objc private func initializeMission() {
// 1.
if mission.points.count < 2 {
print("Not enough waypoints")
return
}
// 2.
if let mission = djiMission {
extension MapViewController: DJISDKManagerDelegate {
func appRegisteredWithError(_ error: Error?) {
if let error = error {
print(error.localizedDescription)
} else {
DJISDKManager.startConnectionToProduct()
}
}
DJISDKManager.registerApp(with: self)
def double_Q_learning(env, episodes=100, step_size=0.01, exploration_rate=0.01):
policy = utils.create_random_policy(env) # Create policy, just for the util function to create Q
# 1. Initialize value dictionaries formated: { S1: { A1: 0.0, A2: 0.0, ...}, ...}
Q_1 = create_state_action_dictionary(env, policy)
Q_2 = create_state_action_dictionary(env, policy)
# 2. Loop through the number of episodes
for episode in range(episodes):
env.reset() # Gym environment reset
def Q_learning(env, episodes=100, step_size=0.01, exploration_rate=0.01):
policy = utils.create_random_policy(env) # Create policy, just for the util function to create Q
Q = create_state_action_dictionary(env, policy) # 1. Initialize value dictionary formated: { S1: { A1: 0.0, A2: 0.0, ...}, ...}
# 2. Loop through the number of episodes
for episode in range(episodes):
env.reset() # Gym environment reset
S = env.env.s # 3. Getting State
finished = False
def sarsa(env, episodes=100, step_size=0.01, exploration_rate=0.01):
policy = utils.create_random_policy(env) # Create policy, just for the util function to create Q
Q = create_state_action_dictionary(env, policy) # 1. Initialize value dictionary formated: { S1: { A1: 0.0, A2: 0.0, ...}, ...}
# 2. Loop through the number of episodes
for episode in range(episodes):
env.reset() # Gym environment reset
S = env.env.s # 3. Getting State
A = greedy_policy(Q)[S] # 4. Deciding on first action
finished = False