Skip to content

Instantly share code, notes, and snippets.

@Trion129
Last active June 20, 2019 10:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Trion129/962f962f85836688302adbf5fb782594 to your computer and use it in GitHub Desktop.
Save Trion129/962f962f85836688302adbf5fb782594 to your computer and use it in GitHub Desktop.
Evolution strategy agent plays space invaders
#include <iostream>
#include <vector>
#include <string>
#include <mlpack/core.hpp>
#include <mlpack/core/optimizers/sgd/sgd.hpp>
#include "../environment.hpp"
using namespace mlpack;
using namespace std;
using namespace arma;
using namespace gym;
class EvolutionStrategyAgent{
public:
mat model;
size_t frameW, frameH, num_actions;
EvolutionStrategyAgent(size_t frameW,
size_t frameH,
size_t num_actions)
:
frameW(frameW),
frameH(frameH),
num_actions(num_actions)
{
model = randu(num_actions, frameW * frameH * 3);
}
void Play(string environment,
string host,
string port)
{
size_t num_workers = 5;
double sigma = 1; // Rewards standard deviation
double alpha = 0.005; // Learning Rate
size_t input = frameW * frameH * 3;
mat workerRewards(num_workers, 1);
vector<mat> epsilons(num_workers);
while(1)
{
#pragma omp parallel for
for(size_t i = 0; i < num_workers; i++)
{
mat epsilon = randn(num_actions, input);
mat innerModel = model + (sigma * epsilon);
epsilons[i] = epsilon;
Environment env(host, port, environment);
env.compression(9);
// Create a folder for Agent Files
string folder("./dummy/");
folder += i + '/';
// Monitor its moves
env.monitor.start(folder, true, false);
env.reset();
env.render();
size_t totalReward = 0;
//Until the episode is complete
while(1)
{
mat maxAction;
mat action = innerModel * vectorise(env.observation);
maxAction = action.index_max();
env.step(maxAction);
totalReward += env.reward;
if (env.done)
{
break;
}
}
env.close();
workerRewards[i] = totalReward;
}
mat sumRxEpsilon = zeros(num_actions, input);
for(size_t i = 0; i < num_workers; i++){
mat stdReward = (workerRewards[i] -
mean(workerRewards)) /
stddev(workerRewards);
sumRxEpsilon += epsilons[i] * as_scalar(stdReward);
}
model = model + (alpha/(num_workers * sigma)) * sumRxEpsilon;
cout << "Worker Rewards: ";
for(double reward : workerRewards){
cout << reward << " ";
}
cout << "\n";
}
}
};
#include "EvolutionStrategy.hpp"
int main(int argc, char* argv[]){
const std::string environment = "SpaceInvaders-v0";
const std::string host = "127.0.0.1"/*"kurg.org"*/;
const std::string port = "4040";
EvolutionStrategyAgent agent(210, 160, 6);
agent.Play(environment, host, port);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment