maxpagels/parallel-evolution-strategies.py

## parallel-evolution-strategies.py
import numpy as np
import multiprocessing
from joblib import Parallel, delayed
np.random.seed(0)


# the function we want to optimize
def f(w):
  reward = -np.sum(np.square(solution - w))
  return reward

# hyperparameters
pop_size = 1000 # population size
std = 1 # noise standard deviation
alpha = 0.001 # learning rate

# start the optimization
solution = np.array([0.5, 0.1, -0.3])
w = np.random.randn(3) # our initial guess is random

def run_episode(w, jittered_w):
    return f(w + jittered_w)

for i in range(1000):

  # print current fitness of the most likely parameter setting
  if i % 20 == 0:
    print('iter %d. w: %s, solution: %s, reward: %f' %
          (i, str(w), str(solution), f(w)))

  N = np.random.randn(pop_size, 3) # samples from a normal distribution N(0,1)
  R = np.zeros(pop_size)

  R = np.array(Parallel(n_jobs=multiprocessing.cpu_count())(delayed(run_episode)(w, std * N[j]) for j in range(pop_size)))
  A = (R - np.mean(R)) / np.std(R)
  w = w + alpha / (pop_size * std) * np.dot(N.T, A)
	import numpy as np
	import multiprocessing
	from joblib import Parallel, delayed
	np.random.seed(0)


	# the function we want to optimize
	def f(w):
	reward = -np.sum(np.square(solution - w))
	return reward

	# hyperparameters
	pop_size = 1000 # population size
	std = 1 # noise standard deviation
	alpha = 0.001 # learning rate

	# start the optimization
	solution = np.array([0.5, 0.1, -0.3])
	w = np.random.randn(3) # our initial guess is random

	def run_episode(w, jittered_w):
	return f(w + jittered_w)

	for i in range(1000):

	# print current fitness of the most likely parameter setting
	if i % 20 == 0:
	print('iter %d. w: %s, solution: %s, reward: %f' %
	(i, str(w), str(solution), f(w)))

	N = np.random.randn(pop_size, 3) # samples from a normal distribution N(0,1)
	R = np.zeros(pop_size)

	R = np.array(Parallel(n_jobs=multiprocessing.cpu_count())(delayed(run_episode)(w, std * N[j]) for j in range(pop_size)))
	A = (R - np.mean(R)) / np.std(R)
	w = w + alpha / (pop_size * std) * np.dot(N.T, A)