-
-
Save kris-singh/54cc569b984e3c3f3bf85f7f58fdfc87 to your computer and use it in GitHub Desktop.
#include <cmath> | |
#include <time.h> | |
#include <stdlib.h> | |
#include <mlpack/core.hpp> | |
#include <mlpack/methods/ann/ffn.hpp> | |
#include <mlpack/methods/ann/layer/layer.hpp> | |
#include <mlpack/methods/ann/layer/leaky_relu.hpp> | |
#include <mlpack/methods/ann/visitor/reset_visitor.hpp> | |
#include <mlpack/methods/ann/visitor/backward_visitor.hpp> | |
#include <mlpack/methods/ann/visitor/parameters_visitor.hpp> | |
#include "environment.hpp" | |
#define print(x) for(auto i: x) std::cout << i << std::endl; | |
using namespace mlpack; | |
using namespace mlpack::ann; | |
using namespace mlpack::optimization; | |
using namespace gym; | |
/* | |
std::vector<double> discountReward(std::vector<double>& reward, double gamma) | |
{ | |
std::vector<double> discountReward(reward.size()); | |
double runningAdd = 0; | |
for(int i = reward.size() - 1; i >= 0; i--) | |
{ | |
// element wise multiplication | |
runningAdd = runningAdd * gamma + reward[i]; | |
discountReward[i] = runningAdd; | |
} | |
double mean = 0; | |
double E = 0; | |
for(auto i: discountReward) | |
mean += i; | |
mean = mean / static_cast<double>(reward.size()); | |
for(auto i: discountReward) | |
E += pow((i - mean), 2); | |
double stdev = sqrt(E / static_cast<double>(reward.size())); | |
//normalise | |
for(size_t i = 0; i < discountReward.size(); i++) | |
discountReward[i] -= mean; | |
for(size_t i = 0; i < discountReward.size(); i++) | |
discountReward[i] /= stdev; | |
return discountReward; | |
} | |
template<typename T> | |
std::pair<std::vector<arma::vec>, std::vector<arma::mat>> | |
backwardPolicy(T& model, std::vector<double>& discountReward, | |
std::vector<arma::vec>& epsHidden, | |
std::vector<arma::vec>& epsInput) | |
{ | |
std::vector<arma::vec> dw2(epsHidden.size()); | |
std::vector<arma::vec> dh(epsHidden.size()); | |
std::vector<arma::mat> dw1(epsHidden.size()); | |
arma::mat weights; | |
for(size_t i = 0; i < epsHidden.size(); i++) | |
{ | |
dw2[i] = epsHidden[i] * discountReward[i]; | |
} | |
// check if this is a matrix if we have multiple outputs | |
boost::apply_visitor(ParametersVisitor(std::move(weights)), model.Model()[model.Model().size() - 2]); | |
std::cout << weights << std::endl; | |
std::cout <<"-------------" <<discountReward[0] << std::endl; | |
for(size_t i = 0; i < discountReward.size(); i++) | |
{ | |
dh[i] = weights * discountReward[i]; | |
} | |
for(size_t i = 0; i < epsHidden.size(); i++) | |
for(size_t j = 0; j < epsHidden[i].n_rows; j++) | |
if(epsHidden[i][j] <= 0) | |
{ | |
dh[i][j] = 0; | |
} | |
for(size_t i = 0; i < dh.size(); i++) | |
{ | |
dw1[i] = dh[0] * epsInput[0].t(); | |
} | |
return std::make_pair(dw2, dw1); | |
} | |
*/ | |
template<typename T> | |
void GetAction(Environment& env, | |
T& model, | |
arma::mat& data, | |
std::vector<arma::vec>& epsHidden, | |
std::vector<arma::vec>& epsInput, | |
std::vector<double>& epsLabel, | |
std::vector<double>& epsReward, | |
std::vector<double>& epsGrad) | |
{ | |
static double rewardSum = 0; | |
arma::mat action; | |
double yLabel; | |
arma::mat predictionTemp; | |
//for each episode get all the input states | |
epsInput.push_back(data); | |
//forward pass | |
model.Predict(data, predictionTemp); | |
std::cout << "-----" << predictionTemp << std::endl; | |
//log all the hidden values(assuming 1 hidden layer) | |
arma::vec outputParameter = boost::apply_visitor(OutputParameterVisitor(), model.Model()[0]); | |
epsHidden.push_back(outputParameter); | |
//select action to perform | |
srand(time(NULL)); | |
double r = ((double)rand() / (RAND_MAX)) + 1; | |
double action_double = r>arma::as_scalar(predictionTemp) ? 1:0; | |
if (action_double==0) | |
action = arma::zeros(1); | |
else | |
action = arma::ones(1); | |
//take the action | |
env.step(action); | |
//fake label | |
yLabel = arma::as_scalar(action)==0?1:0; | |
//log all the output values | |
epsLabel.push_back(yLabel); | |
//log all the action gradients | |
// assuming loss function is mean squared error | |
// Todo change this to add Gradient() | |
double grad = (yLabel - arma::as_scalar(action)); | |
rewardSum += env.reward; | |
epsGrad.push_back(grad); | |
epsReward.push_back(rewardSum); | |
} | |
int main(int argc, char* argv[]) | |
{ | |
const std::string environment = "CartPole-v0"; | |
const std::string host = "127.0.0.1"; | |
const std::string port = "4040"; | |
FFN<MeanSquaredError<>,RandomInitialization> model; | |
std::vector<arma::vec> epsHidden, epsInput; | |
std::vector<double> epsReward, epsLabel, epsGrad; | |
size_t hiddenLayerSize = 10; | |
size_t numActions = 2; | |
double totalReward = 0; | |
size_t totalSteps = 0; | |
double learning_rate = 0.4; | |
double discount_rate = 0.1; | |
Environment env(host, port, environment); | |
arma::mat observation = env.reset(); | |
Parser P(); | |
model.Add<Linear<> >(observation.n_rows, hiddenLayerSize); | |
model.Add<LeakyReLU<> >(0); | |
model.Add<Linear<> >(hiddenLayerSize, 1); | |
model.Add<LogSoftMax<>>(); | |
int render = 0; | |
while(1==1) | |
{ | |
if (render) | |
env.render(); | |
GetAction<FFN<MeanSquaredError<>,RandomInitialization>>(env, model, observation, | |
epsHidden, | |
epsInput, | |
epsLabel, | |
epsReward, | |
epsGrad); | |
if (env.done) | |
break; | |
} | |
} |
Use
model.Add<LeakyReLU<> >(0);
instead of
LeakyReLU<> l(0);
model.Add(l);
LeakyReLU<>* l = new LeakyReLU<>(0);
model.Add(l);
does also work.
Also arma::vec GetAction(T model, arma::vec data)
create a copy of the mode, but what you like to do is to use the already trained model, so you should use a reference: arma::vec GetAction(T& model, arma::vec data)
.
Linear<>* lastLayer = new Linear<>(hiddenLayerSize, 1);
model.Add(lastLayer);
lastLayer->Gradient()
instead of
model.Add<Linear<> >(hiddenLayerSize, 1);
CMakeFiles/policyexample.dir/policyexample.cpp.o: In function double mlpack::ann::NegativeLogLikelihood<arma::Mat<double>, arma::Mat<double> >::Forward<double>(arma::Mat<double> const&&, arma::Mat<double>&&)': /usr/local/include/mlpack/methods/ann/layer/negative_log_likelihood_impl.hpp:37: undefined reference to
mlpack::Log::Assert(bool, std::string const&)'
CMakeFiles/policyexample.dir/policyexample.cpp.o: In function double mlpack::ann::VRClassReward<arma::Mat<double>, arma::Mat<double> >::Forward<double>(arma::Mat<double> const&&, arma::Mat<double> const&&)': /usr/local/include/mlpack/methods/ann/layer/vr_class_reward_impl.hpp:39: undefined reference to
mlpack::Log::Assert(bool, std::string const&)'
SET(CMAKE_CXX_FLAGS "-std=c++11 -g -O3 -fPIC -L/home/kris/Desktop/GsoC2k17/mlpack/build/lib/ -lmlpack -ffast-math -I/home/kris/Desktop/GsoC2k17/mlpack/build/include/ ${CMAKE_CXX_FLAGS}")
1)on compiling this code....gives a huge error message
2)on executing the code it gives output but ends with *** Error in `./policy_learning.o': double free or corruption (!prev): 0x00000000025d1cd0 ***
Aborted (core dumped). I don't understand why.
3)model.Add<LeakyReLU<>, 0)() dosen't work i dont undertand why.