gandalfsaxe/gist:4ca72ed004b832f4721e5274e2f1bb48

## gistfile1.txt
Automatically generated by Mendeley Desktop 1.17.13
Any changes to this file will be lost if it is regenerated by Mendeley.

BibTeX export options can be customized via Preferences -> BibTeX in Mendeley Desktop

@article{Such2017,
abstract = {Deep artificial neural networks (DNNs) are typically trained via gradient-based learning algorithms, namely backpropagation. Evolution strategies (ES) can rival backprop-based algorithms such as Q-learning and policy gradients on challenging deep reinforcement learning (RL) problems. However, ES can be considered a gradient-based algorithm because it performs stochastic gradient descent via an operation similar to a finite-difference approximation of the gradient. That raises the question of whether non-gradient-based evolutionary algorithms can work at DNN scales. Here we demonstrate they can: we evolve the weights of a DNN with a simple, gradient-free, population-based genetic algorithm (GA) and it performs well on hard deep RL problems, including Atari and humanoid locomotion. The Deep GA successfully evolves networks with over four million free parameters, the largest neural networks ever evolved with a traditional evolutionary algorithm. These results (1) expand our sense of the scale at which GAs can operate, (2) suggest intriguingly that in some cases following the gradient is not the best choice for optimizing performance, and (3) make immediately available the multitude of techniques that have been developed in the neuroevolution community to improve performance on RL problems. To demonstrate the latter, we show that combining DNNs with novelty search, which was designed to encourage exploration on tasks with deceptive or sparse reward functions, can solve a high-dimensional problem on which reward-maximizing algorithms (e.g. DQN, A3C, ES, and the GA) fail. Additionally, the Deep GA parallelizes better than ES, A3C, and DQN, and enables a state-of-the-art compact encoding technique that can represent million-parameter DNNs in thousands of bytes.},
archivePrefix = {arXiv},
arxivId = {1712.06567},
author = {Such, Felipe Petroski and Madhavan, Vashisht and Conti, Edoardo and Lehman, Joel and Stanley, Kenneth O. and Clune, Jeff},
eprint = {1712.06567},
file = {:Users/gandalf/Library/Mobile Documents/com{\~{}}apple{\~{}}CloudDocs/-Sync/Mendeley/Such et al/Such et al. - 2017 - Deep Neuroevolution Genetic Algorithms Are a Competitive Alternative for Training Deep Neural Networks for Reinforc.pdf:pdf},
title = {{Deep Neuroevolution: Genetic Algorithms Are a Competitive Alternative for Training Deep Neural Networks for Reinforcement Learning}},
url = {http://arxiv.org/abs/1712.06567},
year = {2017}
}
@article{Salimans2017,
abstract = {We explore the use of Evolution Strategies (ES), a class of black box optimization algorithms, as an alternative to popular MDP-based RL techniques such as Q-learning and Policy Gradients. Experiments on MuJoCo and Atari show that ES is a viable solution strategy that scales extremely well with the number of CPUs available: By using a novel communication strategy based on common random numbers, our ES implementation only needs to communicate scalars, making it possible to scale to over a thousand parallel workers. This allows us to solve 3D humanoid walking in 10 minutes and obtain competitive results on most Atari games after one hour of training. In addition, we highlight several advantages of ES as a black box optimization technique: it is invariant to action frequency and delayed rewards, tolerant of extremely long horizons, and does not need temporal discounting or value function approximation.},
archivePrefix = {arXiv},
arxivId = {1703.03864},
author = {Salimans, Tim and Ho, Jonathan and Chen, Xi and Sidor, Szymon and Sutskever, Ilya},
doi = {10.1.1.51.6328},
eprint = {1703.03864},
file = {:Users/gandalf/Library/Mobile Documents/com{\~{}}apple{\~{}}CloudDocs/-Sync/Mendeley/Salimans et al/Salimans et al. - 2017 - Evolution Strategies as a Scalable Alternative to Reinforcement Learning.pdf:pdf},
isbn = {3-540-63746-X},
issn = {1744-4292},
pages = {1--13},
pmid = {27474269},
title = {{Evolution Strategies as a Scalable Alternative to Reinforcement Learning}},
url = {http://arxiv.org/abs/1703.03864},
year = {2017}
}
	Automatically generated by Mendeley Desktop 1.17.13
	Any changes to this file will be lost if it is regenerated by Mendeley.

	BibTeX export options can be customized via Preferences -> BibTeX in Mendeley Desktop

	@article{Such2017,
	abstract = {Deep artificial neural networks (DNNs) are typically trained via gradient-based learning algorithms, namely backpropagation. Evolution strategies (ES) can rival backprop-based algorithms such as Q-learning and policy gradients on challenging deep reinforcement learning (RL) problems. However, ES can be considered a gradient-based algorithm because it performs stochastic gradient descent via an operation similar to a finite-difference approximation of the gradient. That raises the question of whether non-gradient-based evolutionary algorithms can work at DNN scales. Here we demonstrate they can: we evolve the weights of a DNN with a simple, gradient-free, population-based genetic algorithm (GA) and it performs well on hard deep RL problems, including Atari and humanoid locomotion. The Deep GA successfully evolves networks with over four million free parameters, the largest neural networks ever evolved with a traditional evolutionary algorithm. These results (1) expand our sense of the scale at which GAs can operate, (2) suggest intriguingly that in some cases following the gradient is not the best choice for optimizing performance, and (3) make immediately available the multitude of techniques that have been developed in the neuroevolution community to improve performance on RL problems. To demonstrate the latter, we show that combining DNNs with novelty search, which was designed to encourage exploration on tasks with deceptive or sparse reward functions, can solve a high-dimensional problem on which reward-maximizing algorithms (e.g. DQN, A3C, ES, and the GA) fail. Additionally, the Deep GA parallelizes better than ES, A3C, and DQN, and enables a state-of-the-art compact encoding technique that can represent million-parameter DNNs in thousands of bytes.},
	archivePrefix = {arXiv},
	arxivId = {1712.06567},
	author = {Such, Felipe Petroski and Madhavan, Vashisht and Conti, Edoardo and Lehman, Joel and Stanley, Kenneth O. and Clune, Jeff},
	eprint = {1712.06567},
	file = {:Users/gandalf/Library/Mobile Documents/com{\~{}}apple{\~{}}CloudDocs/-Sync/Mendeley/Such et al/Such et al. - 2017 - Deep Neuroevolution Genetic Algorithms Are a Competitive Alternative for Training Deep Neural Networks for Reinforc.pdf:pdf},
	title = {{Deep Neuroevolution: Genetic Algorithms Are a Competitive Alternative for Training Deep Neural Networks for Reinforcement Learning}},
	url = {http://arxiv.org/abs/1712.06567},
	year = {2017}
	}
	@article{Salimans2017,
	abstract = {We explore the use of Evolution Strategies (ES), a class of black box optimization algorithms, as an alternative to popular MDP-based RL techniques such as Q-learning and Policy Gradients. Experiments on MuJoCo and Atari show that ES is a viable solution strategy that scales extremely well with the number of CPUs available: By using a novel communication strategy based on common random numbers, our ES implementation only needs to communicate scalars, making it possible to scale to over a thousand parallel workers. This allows us to solve 3D humanoid walking in 10 minutes and obtain competitive results on most Atari games after one hour of training. In addition, we highlight several advantages of ES as a black box optimization technique: it is invariant to action frequency and delayed rewards, tolerant of extremely long horizons, and does not need temporal discounting or value function approximation.},
	archivePrefix = {arXiv},
	arxivId = {1703.03864},
	author = {Salimans, Tim and Ho, Jonathan and Chen, Xi and Sidor, Szymon and Sutskever, Ilya},
	doi = {10.1.1.51.6328},
	eprint = {1703.03864},
	file = {:Users/gandalf/Library/Mobile Documents/com{\~{}}apple{\~{}}CloudDocs/-Sync/Mendeley/Salimans et al/Salimans et al. - 2017 - Evolution Strategies as a Scalable Alternative to Reinforcement Learning.pdf:pdf},
	isbn = {3-540-63746-X},
	issn = {1744-4292},
	pages = {1--13},
	pmid = {27474269},
	title = {{Evolution Strategies as a Scalable Alternative to Reinforcement Learning}},
	url = {http://arxiv.org/abs/1703.03864},
	year = {2017}
	}