Last active
March 11, 2017 22:23
-
-
Save kris-singh/54cc569b984e3c3f3bf85f7f58fdfc87 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cmath> | |
#include <time.h> | |
#include <stdlib.h> | |
#include <mlpack/core.hpp> | |
#include <mlpack/methods/ann/ffn.hpp> | |
#include <mlpack/methods/ann/layer/layer.hpp> | |
#include <mlpack/methods/ann/layer/leaky_relu.hpp> | |
#include <mlpack/methods/ann/visitor/reset_visitor.hpp> | |
#include <mlpack/methods/ann/visitor/backward_visitor.hpp> | |
#include <mlpack/methods/ann/visitor/parameters_visitor.hpp> | |
#include "environment.hpp" | |
#define print(x) for(auto i: x) std::cout << i << std::endl; | |
using namespace mlpack; | |
using namespace mlpack::ann; | |
using namespace mlpack::optimization; | |
using namespace gym; | |
/* | |
std::vector<double> discountReward(std::vector<double>& reward, double gamma) | |
{ | |
std::vector<double> discountReward(reward.size()); | |
double runningAdd = 0; | |
for(int i = reward.size() - 1; i >= 0; i--) | |
{ | |
// element wise multiplication | |
runningAdd = runningAdd * gamma + reward[i]; | |
discountReward[i] = runningAdd; | |
} | |
double mean = 0; | |
double E = 0; | |
for(auto i: discountReward) | |
mean += i; | |
mean = mean / static_cast<double>(reward.size()); | |
for(auto i: discountReward) | |
E += pow((i - mean), 2); | |
double stdev = sqrt(E / static_cast<double>(reward.size())); | |
//normalise | |
for(size_t i = 0; i < discountReward.size(); i++) | |
discountReward[i] -= mean; | |
for(size_t i = 0; i < discountReward.size(); i++) | |
discountReward[i] /= stdev; | |
return discountReward; | |
} | |
template<typename T> | |
std::pair<std::vector<arma::vec>, std::vector<arma::mat>> | |
backwardPolicy(T& model, std::vector<double>& discountReward, | |
std::vector<arma::vec>& epsHidden, | |
std::vector<arma::vec>& epsInput) | |
{ | |
std::vector<arma::vec> dw2(epsHidden.size()); | |
std::vector<arma::vec> dh(epsHidden.size()); | |
std::vector<arma::mat> dw1(epsHidden.size()); | |
arma::mat weights; | |
for(size_t i = 0; i < epsHidden.size(); i++) | |
{ | |
dw2[i] = epsHidden[i] * discountReward[i]; | |
} | |
// check if this is a matrix if we have multiple outputs | |
boost::apply_visitor(ParametersVisitor(std::move(weights)), model.Model()[model.Model().size() - 2]); | |
std::cout << weights << std::endl; | |
std::cout <<"-------------" <<discountReward[0] << std::endl; | |
for(size_t i = 0; i < discountReward.size(); i++) | |
{ | |
dh[i] = weights * discountReward[i]; | |
} | |
for(size_t i = 0; i < epsHidden.size(); i++) | |
for(size_t j = 0; j < epsHidden[i].n_rows; j++) | |
if(epsHidden[i][j] <= 0) | |
{ | |
dh[i][j] = 0; | |
} | |
for(size_t i = 0; i < dh.size(); i++) | |
{ | |
dw1[i] = dh[0] * epsInput[0].t(); | |
} | |
return std::make_pair(dw2, dw1); | |
} | |
*/ | |
template<typename T> | |
void GetAction(Environment& env, | |
T& model, | |
arma::mat& data, | |
std::vector<arma::vec>& epsHidden, | |
std::vector<arma::vec>& epsInput, | |
std::vector<double>& epsLabel, | |
std::vector<double>& epsReward, | |
std::vector<double>& epsGrad) | |
{ | |
static double rewardSum = 0; | |
arma::mat action; | |
double yLabel; | |
arma::mat predictionTemp; | |
//for each episode get all the input states | |
epsInput.push_back(data); | |
//forward pass | |
model.Predict(data, predictionTemp); | |
std::cout << "-----" << predictionTemp << std::endl; | |
//log all the hidden values(assuming 1 hidden layer) | |
arma::vec outputParameter = boost::apply_visitor(OutputParameterVisitor(), model.Model()[0]); | |
epsHidden.push_back(outputParameter); | |
//select action to perform | |
srand(time(NULL)); | |
double r = ((double)rand() / (RAND_MAX)) + 1; | |
double action_double = r>arma::as_scalar(predictionTemp) ? 1:0; | |
if (action_double==0) | |
action = arma::zeros(1); | |
else | |
action = arma::ones(1); | |
//take the action | |
env.step(action); | |
//fake label | |
yLabel = arma::as_scalar(action)==0?1:0; | |
//log all the output values | |
epsLabel.push_back(yLabel); | |
//log all the action gradients | |
// assuming loss function is mean squared error | |
// Todo change this to add Gradient() | |
double grad = (yLabel - arma::as_scalar(action)); | |
rewardSum += env.reward; | |
epsGrad.push_back(grad); | |
epsReward.push_back(rewardSum); | |
} | |
int main(int argc, char* argv[]) | |
{ | |
const std::string environment = "CartPole-v0"; | |
const std::string host = "127.0.0.1"; | |
const std::string port = "4040"; | |
FFN<MeanSquaredError<>,RandomInitialization> model; | |
std::vector<arma::vec> epsHidden, epsInput; | |
std::vector<double> epsReward, epsLabel, epsGrad; | |
size_t hiddenLayerSize = 10; | |
size_t numActions = 2; | |
double totalReward = 0; | |
size_t totalSteps = 0; | |
double learning_rate = 0.4; | |
double discount_rate = 0.1; | |
Environment env(host, port, environment); | |
arma::mat observation = env.reset(); | |
Parser P(); | |
model.Add<Linear<> >(observation.n_rows, hiddenLayerSize); | |
model.Add<LeakyReLU<> >(0); | |
model.Add<Linear<> >(hiddenLayerSize, 1); | |
model.Add<LogSoftMax<>>(); | |
int render = 0; | |
while(1==1) | |
{ | |
if (render) | |
env.render(); | |
GetAction<FFN<MeanSquaredError<>,RandomInitialization>>(env, model, observation, | |
epsHidden, | |
epsInput, | |
epsLabel, | |
epsReward, | |
epsGrad); | |
if (env.done) | |
break; | |
} | |
} |
SET(CMAKE_CXX_FLAGS "-std=c++11 -g -O3 -fPIC -L/home/kris/Desktop/GsoC2k17/mlpack/build/lib/ -lmlpack -ffast-math -I/home/kris/Desktop/GsoC2k17/mlpack/build/include/ ${CMAKE_CXX_FLAGS}")
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
CMakeFiles/policyexample.dir/policyexample.cpp.o: In function
double mlpack::ann::NegativeLogLikelihood<arma::Mat<double>, arma::Mat<double> >::Forward<double>(arma::Mat<double> const&&, arma::Mat<double>&&)': /usr/local/include/mlpack/methods/ann/layer/negative_log_likelihood_impl.hpp:37: undefined reference to
mlpack::Log::Assert(bool, std::string const&)'CMakeFiles/policyexample.dir/policyexample.cpp.o: In function
double mlpack::ann::VRClassReward<arma::Mat<double>, arma::Mat<double> >::Forward<double>(arma::Mat<double> const&&, arma::Mat<double> const&&)': /usr/local/include/mlpack/methods/ann/layer/vr_class_reward_impl.hpp:39: undefined reference to
mlpack::Log::Assert(bool, std::string const&)'