Created
March 1, 2017 09:42
-
-
Save brlauuu/08288361ccb5a28968e16f6f4e33de68 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "offline_heuristic.h" | |
#include "thts.h" | |
#include "utils/string_utils.h" | |
#include "utils/system_utils.h" | |
#include "utils/math_utils.h" | |
#include <fstream> | |
#include <iostream> | |
#include <sstream> | |
/****************************************************************** | |
Offline Heuristic Creation | |
******************************************************************/ | |
OfflineHeuristic* OfflineHeuristic::fromString(std::string& desc, THTS* thts) { | |
StringUtils::trim(desc); | |
assert(desc[0] == '[' && desc[desc.size() - 1] == ']'); | |
StringUtils::removeFirstAndLastCharacter(desc); | |
StringUtils::trim(desc); | |
OfflineHeuristic* result = nullptr; | |
if (desc.find("GD") == 0) { | |
desc = desc.substr(2, desc.size()); | |
result = new GradualDescent(thts); | |
} else { | |
SystemUtils::abort("Unknown Offline Heuristic: " + desc); | |
} | |
assert(result); | |
StringUtils::trim(desc); | |
while (!desc.empty()) { | |
std::string param; | |
std::string value; | |
StringUtils::nextParamValuePair(desc, param, value); | |
if (!result->setValueFromString(param, value)) { | |
SystemUtils::abort("Unused parameter value pair: " + param + " / " + | |
value); | |
} | |
} | |
return result; | |
} | |
// Printing offline heuristic | |
void OfflineHeuristic::print(std::ostream& out, std::string const& indent) { | |
out << "#### " << name << " ####" << std::endl; | |
out << "# Number of actions" << std::endl; | |
out << heuristic.size() << std::endl; | |
for(auto& action : heuristic) { | |
out << "# Action: " << std::endl; | |
out << action.first << std::endl; | |
// Output values for 0 feature | |
std::string pom = indent + "0:" + "["; | |
for(auto& coef : action.second) { | |
std::ostringstream temp; | |
temp << coef[0]; | |
pom += temp.str() + ","; | |
} | |
pom = pom.substr(0, pom.size() - 1); | |
pom += "]\n"; | |
out << pom; | |
// Output values for 1 feature | |
pom = indent + "1:" + "["; | |
for(auto& coef : action.second) { | |
std::ostringstream temp; | |
temp << coef[1]; | |
pom += temp.str() + ","; | |
} | |
pom = pom.substr(0, pom.size() - 1); | |
pom += "]\n"; | |
out << pom; | |
out << "########" << std::endl; | |
} | |
} | |
void OfflineHeuristic::parseHeuristicFromFile(std::string inputFile) { | |
std::string heuristicDesc; | |
if (!SystemUtils::readFile(inputFile, heuristicDesc, "#")) { | |
SystemUtils::abort("Error: Unable to read problem file: " + | |
inputFile); | |
} | |
std::stringstream desc(heuristicDesc); | |
// First line is number of actions | |
// Then follows every action with number of states per action and then it's states and corresponding vector of coefficients i.e. | |
// action_1 | |
// coefficients_feature_0 | |
// coefficients_feature_1 | |
int numActions; | |
desc >> numActions; | |
for(unsigned i = 0; i < numActions; i++) { | |
std::string action; | |
desc >> action; | |
StringUtils::trim(action); | |
// TODO: Write rule | |
if (heuristic.count(action)) { | |
SystemUtils::abort("Action " + action + " was already loaded. See rules for writing offlineHeuristic file."); | |
} | |
// Parse 0 coefs | |
std::string coefsString; | |
desc >> coefsString; | |
StringUtils::trim(coefsString); | |
std::vector<double> pom_coefs; | |
parseCoefficientsFromString(coefsString, pom_coefs); | |
std::vector<std::vector<double>> coefs(pom_coefs.size(), std::vector<double>(2, 1.0)); | |
for (unsigned i = 0; i < pom_coefs.size(); i++) { | |
coefs[i][0] = pom_coefs[i]; | |
} | |
// Parse 1 coefs | |
desc >> coefsString; | |
StringUtils::trim(coefsString); | |
pom_coefs.clear(); | |
parseCoefficientsFromString(coefsString, pom_coefs); | |
for (unsigned i = 0; i < pom_coefs.size(); i++) { | |
coefs[i][1] = pom_coefs[i]; | |
} | |
// Store coefficients | |
heuristic[action] = coefs; | |
} | |
} | |
void OfflineHeuristic::parseCoefficientsFromString(std::string coefsString, std::vector<double>& coefs) { | |
coefsString = coefsString.substr(coefsString.find(':') + 1); | |
StringUtils::removeFirstAndLastCharacter(coefsString); | |
std::vector<std::string> res; | |
StringUtils::split(coefsString, res, ","); | |
coefs.clear(); | |
coefs.resize(res.size()); | |
for(unsigned i = 0; i < res.size(); i++) { | |
coefs[i] = atof(res[i].c_str()); | |
} | |
} | |
void OfflineHeuristic::parseStateToValues(std::string stateString) { | |
if (stateValues.count(stateString)) { | |
return; | |
} | |
std::string pom = stateString; | |
pom.erase(std::remove(pom.begin(), pom.end(), '|'), pom.end()); | |
std::vector<std::string> res; | |
StringUtils::split(pom, res, "_"); | |
std::vector<double> values; | |
for(auto ss : res) { | |
values.push_back(atof(ss.c_str())); | |
} | |
stateValues[stateString] = values; | |
} | |
void OfflineHeuristic::learn(State state, ActionState actionState, | |
double const& reward) { | |
if (!learnHeuristic) { | |
return; | |
} | |
std::string stateString = state.toString(); | |
StringUtils::trim(stateString); | |
std::string actionStateString = actionState.toString(); | |
StringUtils::trim(actionStateString); | |
std::replace(stateString.begin(), stateString.end(), ' ', '_'); | |
// Store in map called 'heuristic' every action, | |
// and for every action, there is a vector of vectors which contains | |
// the value of coefficients that correspond to facts (0 or 1) | |
// Use the reward to learn the coefficients | |
parseStateToValues(stateString); | |
if (heuristic.count(actionStateString)) { | |
SystemUtils::log("\tUpdate for action: " + actionStateString, reward); | |
updateCoefficients(stateString, heuristic[actionStateString], reward); | |
} else { | |
std::vector<std::vector<double>> coefs(state.deterministicStateFluents.size() + state.probabilisticStateFluents.size(), std::vector<double>(2, 0.0)); | |
heuristic[actionStateString] = coefs; | |
updateCoefficients(stateString, heuristic[actionStateString], reward); | |
} | |
} | |
double OfflineHeuristic::calculateSumOfMultipliers(std::vector<std::vector<double>> coefficients, std::vector<double> stateValues) { | |
double res = 0.0; | |
assert(coefficients.size() == stateValues.size()); | |
for(unsigned i = 0; i < coefficients.size(); i++) { | |
res += coefficients[i][(int)stateValues[i]]; | |
} | |
return res; | |
} | |
/****************************************************************** | |
Gradual Descent | |
******************************************************************/ | |
void GradualDescent::updateCoefficients(std::string state, std::vector<std::vector<double>>& coefs, double reward) { | |
// Initial values of coefficients are given by initial step and are all 0.0 | |
// The method that is being performed here is done in a floowing way: | |
// For every (feature, weight) pair - feature being the value of deterministic or nondetermistic state fluent and weight being the value of coefficient corresponding to that fluent - the values of weight are modified so that in sum they approach to the reward value | |
// Every weight is actually a pair for each feature - feature is described as a fact and takes values from the set {0, 1} and for every value there is a coefficient | |
// How fast the method is approaching to the solution depends on the step size, given by user | |
// Depending on the sign of the fluent, step size is either added to or subtracted from the weight accross all weights | |
// The algorithm stops in one of two cases: | |
// - when precision of EPS = 0.000000001 is satisfied between the given reward and the one calculated from current (feature, weights) combination | |
// - when two subsequent calculated values are equal to the precision of EPS | |
// In case the state is being visited for the fist time, we need to parse it so that we are able to retrieve fast the values | |
parseStateToValues(state); | |
std::vector<double> stateVals = stateValues[state]; | |
double calculatedReward = calculateSumOfMultipliers(coefs, stateVals); | |
double difference = std::fabs(reward - calculatedReward); | |
while(!MathUtils::doubleIsEqual(reward, calculatedReward)) { | |
for(unsigned i = 0; i < coefs.size(); i++) { | |
// We take the difference and multiply it with the step -> first change is the biggest and the closer we are to the solution, the smaller the move is | |
// stateVals[i] can be either 0 or 1 which will modify the right coefficient | |
if (type == GradualDescent::DEMOCRACY) { | |
SystemUtils::log("\t\tOld value: ", coefs[i][(int)stateVals[i]], i ); | |
coefs[i][(int)stateVals[i]] += MathUtils::sign(reward - calculatedReward) * (difference / coefs.size() * step); | |
SystemUtils::log("\t\tNew value: ", coefs[i][(int)stateVals[i]], (int)stateVals[i]); | |
} else { | |
SystemUtils::abort("Descent type not supported yet."); | |
} | |
} | |
double tempReward = calculateSumOfMultipliers(coefs, stateVals); | |
double tempDifference = std::fabs(reward - calculatedReward); | |
// If we reached satisfying solution or the newest change is worse than previous, we break the loop (after fixing in later case) | |
if (MathUtils::doubleIsEqual(tempReward, calculatedReward) || MathUtils::doubleIsGreaterOrEqual(tempDifference, difference)) { | |
// Go step back - return to the best case | |
for(unsigned i = 0; i < coefs.size(); i++) { | |
if (type == GradualDescent::DEMOCRACY) { | |
coefs[i][(int)stateVals[i]] -= MathUtils::sign(reward - calculatedReward) * (difference / coefs.size() * step); | |
} else { | |
SystemUtils::abort("Descent type not supported yet."); | |
} | |
} | |
break; | |
} | |
calculatedReward = tempReward; | |
difference = tempDifference; | |
} | |
} | |
bool GradualDescent::setValueFromString(std::string& param, | |
std::string& value) { | |
if (param == "-learn") { | |
// Enable learning of the heuristic | |
if (value == "ON") { | |
setLearnHeuristic(true); | |
} else if (value == "OFF") { | |
setLearnHeuristic(false); | |
} | |
return true; | |
} else if (param == "-loadFrom") { | |
// Load existing heuristic | |
std::ifstream f(value.c_str()); | |
if (!f.good()) { | |
SystemUtils::abort("File " + value + " doesn't exist."); | |
return false; | |
} | |
setHeuristicFileLocationToLoad(value.c_str()); | |
parseHeuristicFromFile(value); | |
return true; | |
} else if (param == "-step") { | |
// Set the step of the learning rate | |
setStep(atof(value.c_str())); | |
return true; | |
} else if (param == "-storeTo") { | |
// Set where the offline heuristic will be stored | |
setHeuristicFileLocationToStore(value.c_str()); | |
return true; | |
} else if (param == "-type") { | |
// Type of Descent | |
if (value == "DEMOCRACY") { | |
setType(GradualDescent::DescentType::DEMOCRACY); | |
} else { | |
SystemUtils::abort("Unknown descent type: " + value); | |
} | |
return true; | |
} | |
return false; | |
} | |
std::string GradualDescent::getDescentType() { | |
if (type == GradualDescent::DescentType::ALL_AS_EQUAL) { | |
return "ALL_AS_EQUAL"; | |
} else if (type == GradualDescent::DescentType::BIAS) { | |
return "BIAS"; | |
} else if (type == GradualDescent::DescentType::DEMOCRACY) { | |
return "DEMOCRACY"; | |
} else { | |
SystemUtils::abort("Descent type is not set."); | |
return ""; | |
} | |
} | |
void GradualDescent::setStep(double _step) { | |
if (MathUtils::doubleIsGreater(_step, 1) || MathUtils::doubleIsSmaller(_step, 0)) { | |
SystemUtils::abort("Step in 'GradualDescent' takes value between 0 and 1.'"); | |
} | |
step = _step; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment