Skip to content

Instantly share code, notes, and snippets.

@kris-singh
Last active March 11, 2017 22:23
Show Gist options
  • Save kris-singh/54cc569b984e3c3f3bf85f7f58fdfc87 to your computer and use it in GitHub Desktop.
Save kris-singh/54cc569b984e3c3f3bf85f7f58fdfc87 to your computer and use it in GitHub Desktop.
#include <cmath>
#include <time.h>
#include <stdlib.h>
#include <mlpack/core.hpp>
#include <mlpack/methods/ann/ffn.hpp>
#include <mlpack/methods/ann/layer/layer.hpp>
#include <mlpack/methods/ann/layer/leaky_relu.hpp>
#include <mlpack/methods/ann/visitor/reset_visitor.hpp>
#include <mlpack/methods/ann/visitor/backward_visitor.hpp>
#include <mlpack/methods/ann/visitor/parameters_visitor.hpp>
#include "environment.hpp"
#define print(x) for(auto i: x) std::cout << i << std::endl;
using namespace mlpack;
using namespace mlpack::ann;
using namespace mlpack::optimization;
using namespace gym;
/*
std::vector<double> discountReward(std::vector<double>& reward, double gamma)
{
std::vector<double> discountReward(reward.size());
double runningAdd = 0;
for(int i = reward.size() - 1; i >= 0; i--)
{
// element wise multiplication
runningAdd = runningAdd * gamma + reward[i];
discountReward[i] = runningAdd;
}
double mean = 0;
double E = 0;
for(auto i: discountReward)
mean += i;
mean = mean / static_cast<double>(reward.size());
for(auto i: discountReward)
E += pow((i - mean), 2);
double stdev = sqrt(E / static_cast<double>(reward.size()));
//normalise
for(size_t i = 0; i < discountReward.size(); i++)
discountReward[i] -= mean;
for(size_t i = 0; i < discountReward.size(); i++)
discountReward[i] /= stdev;
return discountReward;
}
template<typename T>
std::pair<std::vector<arma::vec>, std::vector<arma::mat>>
backwardPolicy(T& model, std::vector<double>& discountReward,
std::vector<arma::vec>& epsHidden,
std::vector<arma::vec>& epsInput)
{
std::vector<arma::vec> dw2(epsHidden.size());
std::vector<arma::vec> dh(epsHidden.size());
std::vector<arma::mat> dw1(epsHidden.size());
arma::mat weights;
for(size_t i = 0; i < epsHidden.size(); i++)
{
dw2[i] = epsHidden[i] * discountReward[i];
}
// check if this is a matrix if we have multiple outputs
boost::apply_visitor(ParametersVisitor(std::move(weights)), model.Model()[model.Model().size() - 2]);
std::cout << weights << std::endl;
std::cout <<"-------------" <<discountReward[0] << std::endl;
for(size_t i = 0; i < discountReward.size(); i++)
{
dh[i] = weights * discountReward[i];
}
for(size_t i = 0; i < epsHidden.size(); i++)
for(size_t j = 0; j < epsHidden[i].n_rows; j++)
if(epsHidden[i][j] <= 0)
{
dh[i][j] = 0;
}
for(size_t i = 0; i < dh.size(); i++)
{
dw1[i] = dh[0] * epsInput[0].t();
}
return std::make_pair(dw2, dw1);
}
*/
template<typename T>
void GetAction(Environment& env,
T& model,
arma::mat& data,
std::vector<arma::vec>& epsHidden,
std::vector<arma::vec>& epsInput,
std::vector<double>& epsLabel,
std::vector<double>& epsReward,
std::vector<double>& epsGrad)
{
static double rewardSum = 0;
arma::mat action;
double yLabel;
arma::mat predictionTemp;
//for each episode get all the input states
epsInput.push_back(data);
//forward pass
model.Predict(data, predictionTemp);
std::cout << "-----" << predictionTemp << std::endl;
//log all the hidden values(assuming 1 hidden layer)
arma::vec outputParameter = boost::apply_visitor(OutputParameterVisitor(), model.Model()[0]);
epsHidden.push_back(outputParameter);
//select action to perform
srand(time(NULL));
double r = ((double)rand() / (RAND_MAX)) + 1;
double action_double = r>arma::as_scalar(predictionTemp) ? 1:0;
if (action_double==0)
action = arma::zeros(1);
else
action = arma::ones(1);
//take the action
env.step(action);
//fake label
yLabel = arma::as_scalar(action)==0?1:0;
//log all the output values
epsLabel.push_back(yLabel);
//log all the action gradients
// assuming loss function is mean squared error
// Todo change this to add Gradient()
double grad = (yLabel - arma::as_scalar(action));
rewardSum += env.reward;
epsGrad.push_back(grad);
epsReward.push_back(rewardSum);
}
int main(int argc, char* argv[])
{
const std::string environment = "CartPole-v0";
const std::string host = "127.0.0.1";
const std::string port = "4040";
FFN<MeanSquaredError<>,RandomInitialization> model;
std::vector<arma::vec> epsHidden, epsInput;
std::vector<double> epsReward, epsLabel, epsGrad;
size_t hiddenLayerSize = 10;
size_t numActions = 2;
double totalReward = 0;
size_t totalSteps = 0;
double learning_rate = 0.4;
double discount_rate = 0.1;
Environment env(host, port, environment);
arma::mat observation = env.reset();
Parser P();
model.Add<Linear<> >(observation.n_rows, hiddenLayerSize);
model.Add<LeakyReLU<> >(0);
model.Add<Linear<> >(hiddenLayerSize, 1);
model.Add<LogSoftMax<>>();
int render = 0;
while(1==1)
{
if (render)
env.render();
GetAction<FFN<MeanSquaredError<>,RandomInitialization>>(env, model, observation,
epsHidden,
epsInput,
epsLabel,
epsReward,
epsGrad);
if (env.done)
break;
}
}
@kris-singh
Copy link
Author

1)on compiling this code....gives a huge error message
2)on executing the code it gives output but ends with *** Error in `./policy_learning.o': double free or corruption (!prev): 0x00000000025d1cd0 ***
Aborted (core dumped). I don't understand why.
3)model.Add<LeakyReLU<>, 0)() dosen't work i dont undertand why.

@zoq
Copy link

zoq commented Mar 8, 2017

Use

model.Add<LeakyReLU<> >(0);

instead of

LeakyReLU<> l(0);
model.Add(l);
LeakyReLU<>* l = new LeakyReLU<>(0);
model.Add(l);

does also work.

@zoq
Copy link

zoq commented Mar 8, 2017

Also arma::vec GetAction(T model, arma::vec data)create a copy of the mode, but what you like to do is to use the already trained model, so you should use a reference: arma::vec GetAction(T& model, arma::vec data).

@zoq
Copy link

zoq commented Mar 9, 2017

Linear<>* lastLayer = new Linear<>(hiddenLayerSize, 1);
model.Add(lastLayer);

lastLayer->Gradient()

instead of

model.Add<Linear<> >(hiddenLayerSize, 1);

@kris-singh
Copy link
Author

CMakeFiles/policyexample.dir/policyexample.cpp.o: In function double mlpack::ann::NegativeLogLikelihood<arma::Mat<double>, arma::Mat<double> >::Forward<double>(arma::Mat<double> const&&, arma::Mat<double>&&)': /usr/local/include/mlpack/methods/ann/layer/negative_log_likelihood_impl.hpp:37: undefined reference to mlpack::Log::Assert(bool, std::string const&)'
CMakeFiles/policyexample.dir/policyexample.cpp.o: In function double mlpack::ann::VRClassReward<arma::Mat<double>, arma::Mat<double> >::Forward<double>(arma::Mat<double> const&&, arma::Mat<double> const&&)': /usr/local/include/mlpack/methods/ann/layer/vr_class_reward_impl.hpp:39: undefined reference to mlpack::Log::Assert(bool, std::string const&)'

@kris-singh
Copy link
Author

SET(CMAKE_CXX_FLAGS "-std=c++11 -g -O3 -fPIC -L/home/kris/Desktop/GsoC2k17/mlpack/build/lib/ -lmlpack -ffast-math -I/home/kris/Desktop/GsoC2k17/mlpack/build/include/ ${CMAKE_CXX_FLAGS}")

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment