Skip to content

Instantly share code, notes, and snippets.

@brlauuu
Created March 1, 2017 09:42
Show Gist options
  • Save brlauuu/08288361ccb5a28968e16f6f4e33de68 to your computer and use it in GitHub Desktop.
Save brlauuu/08288361ccb5a28968e16f6f4e33de68 to your computer and use it in GitHub Desktop.
#include "offline_heuristic.h"
#include "thts.h"
#include "utils/string_utils.h"
#include "utils/system_utils.h"
#include "utils/math_utils.h"
#include <fstream>
#include <iostream>
#include <sstream>
/******************************************************************
Offline Heuristic Creation
******************************************************************/
OfflineHeuristic* OfflineHeuristic::fromString(std::string& desc, THTS* thts) {
StringUtils::trim(desc);
assert(desc[0] == '[' && desc[desc.size() - 1] == ']');
StringUtils::removeFirstAndLastCharacter(desc);
StringUtils::trim(desc);
OfflineHeuristic* result = nullptr;
if (desc.find("GD") == 0) {
desc = desc.substr(2, desc.size());
result = new GradualDescent(thts);
} else {
SystemUtils::abort("Unknown Offline Heuristic: " + desc);
}
assert(result);
StringUtils::trim(desc);
while (!desc.empty()) {
std::string param;
std::string value;
StringUtils::nextParamValuePair(desc, param, value);
if (!result->setValueFromString(param, value)) {
SystemUtils::abort("Unused parameter value pair: " + param + " / " +
value);
}
}
return result;
}
// Printing offline heuristic
void OfflineHeuristic::print(std::ostream& out, std::string const& indent) {
out << "#### " << name << " ####" << std::endl;
out << "# Number of actions" << std::endl;
out << heuristic.size() << std::endl;
for(auto& action : heuristic) {
out << "# Action: " << std::endl;
out << action.first << std::endl;
// Output values for 0 feature
std::string pom = indent + "0:" + "[";
for(auto& coef : action.second) {
std::ostringstream temp;
temp << coef[0];
pom += temp.str() + ",";
}
pom = pom.substr(0, pom.size() - 1);
pom += "]\n";
out << pom;
// Output values for 1 feature
pom = indent + "1:" + "[";
for(auto& coef : action.second) {
std::ostringstream temp;
temp << coef[1];
pom += temp.str() + ",";
}
pom = pom.substr(0, pom.size() - 1);
pom += "]\n";
out << pom;
out << "########" << std::endl;
}
}
void OfflineHeuristic::parseHeuristicFromFile(std::string inputFile) {
std::string heuristicDesc;
if (!SystemUtils::readFile(inputFile, heuristicDesc, "#")) {
SystemUtils::abort("Error: Unable to read problem file: " +
inputFile);
}
std::stringstream desc(heuristicDesc);
// First line is number of actions
// Then follows every action with number of states per action and then it's states and corresponding vector of coefficients i.e.
// action_1
// coefficients_feature_0
// coefficients_feature_1
int numActions;
desc >> numActions;
for(unsigned i = 0; i < numActions; i++) {
std::string action;
desc >> action;
StringUtils::trim(action);
// TODO: Write rule
if (heuristic.count(action)) {
SystemUtils::abort("Action " + action + " was already loaded. See rules for writing offlineHeuristic file.");
}
// Parse 0 coefs
std::string coefsString;
desc >> coefsString;
StringUtils::trim(coefsString);
std::vector<double> pom_coefs;
parseCoefficientsFromString(coefsString, pom_coefs);
std::vector<std::vector<double>> coefs(pom_coefs.size(), std::vector<double>(2, 1.0));
for (unsigned i = 0; i < pom_coefs.size(); i++) {
coefs[i][0] = pom_coefs[i];
}
// Parse 1 coefs
desc >> coefsString;
StringUtils::trim(coefsString);
pom_coefs.clear();
parseCoefficientsFromString(coefsString, pom_coefs);
for (unsigned i = 0; i < pom_coefs.size(); i++) {
coefs[i][1] = pom_coefs[i];
}
// Store coefficients
heuristic[action] = coefs;
}
}
void OfflineHeuristic::parseCoefficientsFromString(std::string coefsString, std::vector<double>& coefs) {
coefsString = coefsString.substr(coefsString.find(':') + 1);
StringUtils::removeFirstAndLastCharacter(coefsString);
std::vector<std::string> res;
StringUtils::split(coefsString, res, ",");
coefs.clear();
coefs.resize(res.size());
for(unsigned i = 0; i < res.size(); i++) {
coefs[i] = atof(res[i].c_str());
}
}
void OfflineHeuristic::parseStateToValues(std::string stateString) {
if (stateValues.count(stateString)) {
return;
}
std::string pom = stateString;
pom.erase(std::remove(pom.begin(), pom.end(), '|'), pom.end());
std::vector<std::string> res;
StringUtils::split(pom, res, "_");
std::vector<double> values;
for(auto ss : res) {
values.push_back(atof(ss.c_str()));
}
stateValues[stateString] = values;
}
void OfflineHeuristic::learn(State state, ActionState actionState,
double const& reward) {
if (!learnHeuristic) {
return;
}
std::string stateString = state.toString();
StringUtils::trim(stateString);
std::string actionStateString = actionState.toString();
StringUtils::trim(actionStateString);
std::replace(stateString.begin(), stateString.end(), ' ', '_');
// Store in map called 'heuristic' every action,
// and for every action, there is a vector of vectors which contains
// the value of coefficients that correspond to facts (0 or 1)
// Use the reward to learn the coefficients
parseStateToValues(stateString);
if (heuristic.count(actionStateString)) {
SystemUtils::log("\tUpdate for action: " + actionStateString, reward);
updateCoefficients(stateString, heuristic[actionStateString], reward);
} else {
std::vector<std::vector<double>> coefs(state.deterministicStateFluents.size() + state.probabilisticStateFluents.size(), std::vector<double>(2, 0.0));
heuristic[actionStateString] = coefs;
updateCoefficients(stateString, heuristic[actionStateString], reward);
}
}
double OfflineHeuristic::calculateSumOfMultipliers(std::vector<std::vector<double>> coefficients, std::vector<double> stateValues) {
double res = 0.0;
assert(coefficients.size() == stateValues.size());
for(unsigned i = 0; i < coefficients.size(); i++) {
res += coefficients[i][(int)stateValues[i]];
}
return res;
}
/******************************************************************
Gradual Descent
******************************************************************/
void GradualDescent::updateCoefficients(std::string state, std::vector<std::vector<double>>& coefs, double reward) {
// Initial values of coefficients are given by initial step and are all 0.0
// The method that is being performed here is done in a floowing way:
// For every (feature, weight) pair - feature being the value of deterministic or nondetermistic state fluent and weight being the value of coefficient corresponding to that fluent - the values of weight are modified so that in sum they approach to the reward value
// Every weight is actually a pair for each feature - feature is described as a fact and takes values from the set {0, 1} and for every value there is a coefficient
// How fast the method is approaching to the solution depends on the step size, given by user
// Depending on the sign of the fluent, step size is either added to or subtracted from the weight accross all weights
// The algorithm stops in one of two cases:
// - when precision of EPS = 0.000000001 is satisfied between the given reward and the one calculated from current (feature, weights) combination
// - when two subsequent calculated values are equal to the precision of EPS
// In case the state is being visited for the fist time, we need to parse it so that we are able to retrieve fast the values
parseStateToValues(state);
std::vector<double> stateVals = stateValues[state];
double calculatedReward = calculateSumOfMultipliers(coefs, stateVals);
double difference = std::fabs(reward - calculatedReward);
while(!MathUtils::doubleIsEqual(reward, calculatedReward)) {
for(unsigned i = 0; i < coefs.size(); i++) {
// We take the difference and multiply it with the step -> first change is the biggest and the closer we are to the solution, the smaller the move is
// stateVals[i] can be either 0 or 1 which will modify the right coefficient
if (type == GradualDescent::DEMOCRACY) {
SystemUtils::log("\t\tOld value: ", coefs[i][(int)stateVals[i]], i );
coefs[i][(int)stateVals[i]] += MathUtils::sign(reward - calculatedReward) * (difference / coefs.size() * step);
SystemUtils::log("\t\tNew value: ", coefs[i][(int)stateVals[i]], (int)stateVals[i]);
} else {
SystemUtils::abort("Descent type not supported yet.");
}
}
double tempReward = calculateSumOfMultipliers(coefs, stateVals);
double tempDifference = std::fabs(reward - calculatedReward);
// If we reached satisfying solution or the newest change is worse than previous, we break the loop (after fixing in later case)
if (MathUtils::doubleIsEqual(tempReward, calculatedReward) || MathUtils::doubleIsGreaterOrEqual(tempDifference, difference)) {
// Go step back - return to the best case
for(unsigned i = 0; i < coefs.size(); i++) {
if (type == GradualDescent::DEMOCRACY) {
coefs[i][(int)stateVals[i]] -= MathUtils::sign(reward - calculatedReward) * (difference / coefs.size() * step);
} else {
SystemUtils::abort("Descent type not supported yet.");
}
}
break;
}
calculatedReward = tempReward;
difference = tempDifference;
}
}
bool GradualDescent::setValueFromString(std::string& param,
std::string& value) {
if (param == "-learn") {
// Enable learning of the heuristic
if (value == "ON") {
setLearnHeuristic(true);
} else if (value == "OFF") {
setLearnHeuristic(false);
}
return true;
} else if (param == "-loadFrom") {
// Load existing heuristic
std::ifstream f(value.c_str());
if (!f.good()) {
SystemUtils::abort("File " + value + " doesn't exist.");
return false;
}
setHeuristicFileLocationToLoad(value.c_str());
parseHeuristicFromFile(value);
return true;
} else if (param == "-step") {
// Set the step of the learning rate
setStep(atof(value.c_str()));
return true;
} else if (param == "-storeTo") {
// Set where the offline heuristic will be stored
setHeuristicFileLocationToStore(value.c_str());
return true;
} else if (param == "-type") {
// Type of Descent
if (value == "DEMOCRACY") {
setType(GradualDescent::DescentType::DEMOCRACY);
} else {
SystemUtils::abort("Unknown descent type: " + value);
}
return true;
}
return false;
}
std::string GradualDescent::getDescentType() {
if (type == GradualDescent::DescentType::ALL_AS_EQUAL) {
return "ALL_AS_EQUAL";
} else if (type == GradualDescent::DescentType::BIAS) {
return "BIAS";
} else if (type == GradualDescent::DescentType::DEMOCRACY) {
return "DEMOCRACY";
} else {
SystemUtils::abort("Descent type is not set.");
return "";
}
}
void GradualDescent::setStep(double _step) {
if (MathUtils::doubleIsGreater(_step, 1) || MathUtils::doubleIsSmaller(_step, 0)) {
SystemUtils::abort("Step in 'GradualDescent' takes value between 0 and 1.'");
}
step = _step;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment