brlauuu/OfflineHeuristic.cc

## OfflineHeuristic.cc
#include "offline_heuristic.h"

#include "thts.h"

#include "utils/string_utils.h"
#include "utils/system_utils.h"
#include "utils/math_utils.h"

#include <fstream>
#include <iostream>
#include <sstream>

/******************************************************************
                     Offline Heuristic Creation
******************************************************************/

OfflineHeuristic* OfflineHeuristic::fromString(std::string& desc, THTS* thts) {
    StringUtils::trim(desc);
    assert(desc[0] == '[' && desc[desc.size() - 1] == ']');
    StringUtils::removeFirstAndLastCharacter(desc);
    StringUtils::trim(desc);

    OfflineHeuristic*  result = nullptr;

    if (desc.find("GD") == 0) {
        desc = desc.substr(2, desc.size());
        result = new GradualDescent(thts);
    } else {
        SystemUtils::abort("Unknown Offline Heuristic: " + desc);
    }

    assert(result);
    StringUtils::trim(desc);

    while (!desc.empty()) {
        std::string param;
        std::string value;
        StringUtils::nextParamValuePair(desc, param, value);

        if (!result->setValueFromString(param, value)) {
            SystemUtils::abort("Unused parameter value pair: " + param + " / " +
                               value);
        }
    }
    return result;
}

// Printing offline heuristic
void OfflineHeuristic::print(std::ostream& out, std::string const& indent) {
    out << "#### " << name << " ####" << std::endl;
    out << "# Number of actions" << std::endl;
    out << heuristic.size() << std::endl;
    for(auto& action : heuristic) {
        out << "# Action: " << std::endl;
        out << action.first << std::endl;

        // Output values for 0 feature
        std::string pom = indent + "0:" + "[";
        for(auto& coef : action.second) {
            std::ostringstream temp;
            temp << coef[0];
            pom += temp.str() + ",";
        }
        pom = pom.substr(0, pom.size() - 1);
        pom +=  "]\n";

        out << pom;

        // Output values for 1 feature
        pom = indent + "1:" + "[";
        for(auto& coef : action.second) {
            std::ostringstream temp;
            temp << coef[1];
            pom += temp.str() + ",";
        }
        pom = pom.substr(0, pom.size() - 1);
        pom +=  "]\n";

        out << pom;

        out << "########" << std::endl;
    }
}

void OfflineHeuristic::parseHeuristicFromFile(std::string inputFile) {
    std::string heuristicDesc;
    if (!SystemUtils::readFile(inputFile, heuristicDesc, "#")) {
        SystemUtils::abort("Error: Unable to read problem file: " +
                           inputFile);
    }
    std::stringstream desc(heuristicDesc);

    // First line is number of actions
    // Then follows every action with number of states per action and then it's states and corresponding vector of coefficients i.e.
    //      action_1
    //          coefficients_feature_0
    //          coefficients_feature_1

    int numActions;
    desc >> numActions;
    for(unsigned i = 0; i < numActions; i++) {
        std::string action;
        desc >> action;
        StringUtils::trim(action);


        // TODO: Write rule
        if (heuristic.count(action)) {
            SystemUtils::abort("Action " + action + " was already loaded. See rules for writing offlineHeuristic file.");
        }

        // Parse 0 coefs
        std::string coefsString;
        desc >> coefsString;
        StringUtils::trim(coefsString);
        std::vector<double> pom_coefs;
        parseCoefficientsFromString(coefsString, pom_coefs);

        std::vector<std::vector<double>> coefs(pom_coefs.size(), std::vector<double>(2, 1.0));

        for (unsigned i = 0; i < pom_coefs.size(); i++) {
            coefs[i][0] = pom_coefs[i];
        }

        // Parse 1 coefs
        desc >> coefsString;
        StringUtils::trim(coefsString);
        pom_coefs.clear();
        parseCoefficientsFromString(coefsString, pom_coefs);

        for (unsigned i = 0; i < pom_coefs.size(); i++) {
            coefs[i][1] = pom_coefs[i];
        }

        // Store coefficients
        heuristic[action] = coefs;
    }
}

void OfflineHeuristic::parseCoefficientsFromString(std::string coefsString, std::vector<double>& coefs) {
    coefsString = coefsString.substr(coefsString.find(':') + 1);
    StringUtils::removeFirstAndLastCharacter(coefsString);
    std::vector<std::string> res;
    StringUtils::split(coefsString, res, ",");

    coefs.clear();
    coefs.resize(res.size());

    for(unsigned i = 0; i < res.size(); i++) {
        coefs[i] = atof(res[i].c_str());
    }
}

void OfflineHeuristic::parseStateToValues(std::string stateString) {
    if (stateValues.count(stateString)) {
        return;
    }

    std::string pom = stateString;
    pom.erase(std::remove(pom.begin(), pom.end(), '|'), pom.end());
    std::vector<std::string> res;
    StringUtils::split(pom, res, "_");

    std::vector<double> values;
    for(auto ss : res) {
        values.push_back(atof(ss.c_str()));
    }

    stateValues[stateString] = values;
}


void OfflineHeuristic::learn(State state, ActionState actionState,
                            double const& reward) {

    if (!learnHeuristic) {
        return;
    }

    std::string stateString = state.toString();
    StringUtils::trim(stateString);
    std::string actionStateString = actionState.toString();
    StringUtils::trim(actionStateString);

    std::replace(stateString.begin(), stateString.end(), ' ', '_');

    // Store in map called 'heuristic' every action,
    // and for every action, there is a vector of vectors which contains
    // the value of coefficients that correspond to facts (0 or 1)
    // Use the reward to learn the coefficients

    parseStateToValues(stateString);

    if (heuristic.count(actionStateString)) {
        SystemUtils::log("\tUpdate for action: " + actionStateString, reward);
        updateCoefficients(stateString, heuristic[actionStateString], reward);
    } else {
        std::vector<std::vector<double>> coefs(state.deterministicStateFluents.size() + state.probabilisticStateFluents.size(), std::vector<double>(2, 0.0));
        heuristic[actionStateString] = coefs;
        updateCoefficients(stateString, heuristic[actionStateString], reward);
    }
}


double OfflineHeuristic::calculateSumOfMultipliers(std::vector<std::vector<double>> coefficients, std::vector<double> stateValues) {
    double res = 0.0;
    assert(coefficients.size() == stateValues.size());
    for(unsigned i = 0; i < coefficients.size(); i++) {
        res += coefficients[i][(int)stateValues[i]];
    }

    return res;
}

/******************************************************************
                     Gradual Descent
******************************************************************/

void GradualDescent::updateCoefficients(std::string state, std::vector<std::vector<double>>& coefs, double reward) {
    // Initial values of coefficients are given by initial step and are all 0.0
    // The method that is being performed here is done in a floowing way:
    //      For every (feature, weight) pair - feature being the value of deterministic or nondetermistic state fluent and weight being the value of coefficient corresponding to that fluent - the values of weight are modified so that in sum they approach to the reward value
    //      Every weight is actually a pair for each feature - feature is described as a fact and takes values from the set {0, 1} and for every value there is a coefficient
    //      How fast the method is approaching to the solution depends on the step size, given by user
    //      Depending on the sign of the fluent, step size is either added to or subtracted from the weight accross all weights
    //      The algorithm stops in one of two cases:
    //          - when precision of EPS = 0.000000001 is satisfied between the given reward and the one calculated from current (feature, weights) combination
    //          - when two subsequent calculated values are equal to the precision of EPS

    // In case the state is being visited for the fist time, we need to parse it so that we are able to retrieve fast the values
    parseStateToValues(state);

    std::vector<double> stateVals = stateValues[state];
    double calculatedReward = calculateSumOfMultipliers(coefs, stateVals);

    double difference = std::fabs(reward - calculatedReward);

    while(!MathUtils::doubleIsEqual(reward, calculatedReward)) {
        for(unsigned i = 0; i < coefs.size(); i++) {
            // We take the difference and multiply it with the step -> first change is the biggest and the closer we are to the solution, the smaller the move is
            // stateVals[i] can be either 0 or 1 which will modify the right coefficient

            if (type == GradualDescent::DEMOCRACY) {
                SystemUtils::log("\t\tOld value: ", coefs[i][(int)stateVals[i]], i );
                coefs[i][(int)stateVals[i]] += MathUtils::sign(reward - calculatedReward) * (difference / coefs.size() * step);
                SystemUtils::log("\t\tNew value: ", coefs[i][(int)stateVals[i]], (int)stateVals[i]);
            } else {
                SystemUtils::abort("Descent type not supported yet.");
            }

        }

        double tempReward = calculateSumOfMultipliers(coefs, stateVals);
        double tempDifference = std::fabs(reward - calculatedReward);
        // If we reached satisfying solution or the newest change is worse than previous, we break the loop  (after fixing in later case)
        if (MathUtils::doubleIsEqual(tempReward, calculatedReward) || MathUtils::doubleIsGreaterOrEqual(tempDifference, difference)) {
            // Go step back - return to the best case
            for(unsigned i = 0; i < coefs.size(); i++) {
                if (type == GradualDescent::DEMOCRACY) {
                    coefs[i][(int)stateVals[i]] -= MathUtils::sign(reward - calculatedReward) * (difference / coefs.size() * step);
                } else {
                    SystemUtils::abort("Descent type not supported yet.");
                }
            }
            break;
        }

        calculatedReward = tempReward;
        difference = tempDifference;
    }
}

bool GradualDescent::setValueFromString(std::string& param,
                                         std::string& value) {
    if (param == "-learn") {
        // Enable learning of the heuristic
        if (value == "ON") {
            setLearnHeuristic(true);
        } else if (value == "OFF") {
            setLearnHeuristic(false);
        }
        return true;
    } else if (param == "-loadFrom") {
        // Load existing heuristic
        std::ifstream f(value.c_str());
        if (!f.good()) {
            SystemUtils::abort("File " + value + " doesn't exist.");
            return false;
        }
        setHeuristicFileLocationToLoad(value.c_str());
        parseHeuristicFromFile(value);
        return true;
    } else if (param == "-step") {
        // Set the step of the learning rate
        setStep(atof(value.c_str()));
        return true;
    } else if (param == "-storeTo") {
        // Set where the offline heuristic will be stored
        setHeuristicFileLocationToStore(value.c_str());
        return true;
    } else if (param == "-type") {
        // Type of Descent
        if (value == "DEMOCRACY") {
            setType(GradualDescent::DescentType::DEMOCRACY);
        } else {
            SystemUtils::abort("Unknown descent type: " + value);
        }
        return true;
    }

    return false;
}

std::string GradualDescent::getDescentType() {
    if (type == GradualDescent::DescentType::ALL_AS_EQUAL) {
        return "ALL_AS_EQUAL";
    } else if (type == GradualDescent::DescentType::BIAS) {
        return "BIAS";
    } else if (type == GradualDescent::DescentType::DEMOCRACY) {
        return "DEMOCRACY";
    } else {
        SystemUtils::abort("Descent type is not set.");
        return "";
    }
}


void GradualDescent::setStep(double _step) {
    if (MathUtils::doubleIsGreater(_step, 1) || MathUtils::doubleIsSmaller(_step, 0)) {
        SystemUtils::abort("Step in 'GradualDescent' takes value between 0 and 1.'");
    }

    step = _step;
}
	#include "offline_heuristic.h"

	#include "thts.h"

	#include "utils/string_utils.h"
	#include "utils/system_utils.h"
	#include "utils/math_utils.h"

	#include <fstream>
	#include <iostream>
	#include <sstream>

	/******************************************************************
	Offline Heuristic Creation
	******************************************************************/

	OfflineHeuristic* OfflineHeuristic::fromString(std::string& desc, THTS* thts) {
	StringUtils::trim(desc);
	assert(desc[0] == '[' && desc[desc.size() - 1] == ']');
	StringUtils::removeFirstAndLastCharacter(desc);
	StringUtils::trim(desc);

	OfflineHeuristic* result = nullptr;

	if (desc.find("GD") == 0) {
	desc = desc.substr(2, desc.size());
	result = new GradualDescent(thts);
	} else {
	SystemUtils::abort("Unknown Offline Heuristic: " + desc);
	}

	assert(result);
	StringUtils::trim(desc);

	while (!desc.empty()) {
	std::string param;
	std::string value;
	StringUtils::nextParamValuePair(desc, param, value);

	if (!result->setValueFromString(param, value)) {
	SystemUtils::abort("Unused parameter value pair: " + param + " / " +
	value);
	}
	}
	return result;
	}

	// Printing offline heuristic
	void OfflineHeuristic::print(std::ostream& out, std::string const& indent) {
	out << "#### " << name << " ####" << std::endl;
	out << "# Number of actions" << std::endl;
	out << heuristic.size() << std::endl;
	for(auto& action : heuristic) {
	out << "# Action: " << std::endl;
	out << action.first << std::endl;

	// Output values for 0 feature
	std::string pom = indent + "0:" + "[";
	for(auto& coef : action.second) {
	std::ostringstream temp;
	temp << coef[0];
	pom += temp.str() + ",";
	}
	pom = pom.substr(0, pom.size() - 1);
	pom += "]\n";

	out << pom;

	// Output values for 1 feature
	pom = indent + "1:" + "[";
	for(auto& coef : action.second) {
	std::ostringstream temp;
	temp << coef[1];
	pom += temp.str() + ",";
	}
	pom = pom.substr(0, pom.size() - 1);
	pom += "]\n";

	out << pom;

	out << "########" << std::endl;
	}
	}

	void OfflineHeuristic::parseHeuristicFromFile(std::string inputFile) {
	std::string heuristicDesc;
	if (!SystemUtils::readFile(inputFile, heuristicDesc, "#")) {
	SystemUtils::abort("Error: Unable to read problem file: " +
	inputFile);
	}
	std::stringstream desc(heuristicDesc);

	// First line is number of actions
	// Then follows every action with number of states per action and then it's states and corresponding vector of coefficients i.e.
	// action_1
	// coefficients_feature_0
	// coefficients_feature_1

	int numActions;
	desc >> numActions;
	for(unsigned i = 0; i < numActions; i++) {
	std::string action;
	desc >> action;
	StringUtils::trim(action);


	// TODO: Write rule
	if (heuristic.count(action)) {
	SystemUtils::abort("Action " + action + " was already loaded. See rules for writing offlineHeuristic file.");
	}

	// Parse 0 coefs
	std::string coefsString;
	desc >> coefsString;
	StringUtils::trim(coefsString);
	std::vector<double> pom_coefs;
	parseCoefficientsFromString(coefsString, pom_coefs);

	std::vector<std::vector<double>> coefs(pom_coefs.size(), std::vector<double>(2, 1.0));

	for (unsigned i = 0; i < pom_coefs.size(); i++) {
	coefs[i][0] = pom_coefs[i];
	}

	// Parse 1 coefs
	desc >> coefsString;
	StringUtils::trim(coefsString);
	pom_coefs.clear();
	parseCoefficientsFromString(coefsString, pom_coefs);

	for (unsigned i = 0; i < pom_coefs.size(); i++) {
	coefs[i][1] = pom_coefs[i];
	}

	// Store coefficients
	heuristic[action] = coefs;
	}
	}

	void OfflineHeuristic::parseCoefficientsFromString(std::string coefsString, std::vector<double>& coefs) {
	coefsString = coefsString.substr(coefsString.find(':') + 1);
	StringUtils::removeFirstAndLastCharacter(coefsString);
	std::vector<std::string> res;
	StringUtils::split(coefsString, res, ",");

	coefs.clear();
	coefs.resize(res.size());

	for(unsigned i = 0; i < res.size(); i++) {
	coefs[i] = atof(res[i].c_str());
	}
	}

	void OfflineHeuristic::parseStateToValues(std::string stateString) {
	if (stateValues.count(stateString)) {
	return;
	}

	std::string pom = stateString;
	pom.erase(std::remove(pom.begin(), pom.end(), '\|'), pom.end());
	std::vector<std::string> res;
	StringUtils::split(pom, res, "_");

	std::vector<double> values;
	for(auto ss : res) {
	values.push_back(atof(ss.c_str()));
	}

	stateValues[stateString] = values;
	}


	void OfflineHeuristic::learn(State state, ActionState actionState,
	double const& reward) {

	if (!learnHeuristic) {
	return;
	}

	std::string stateString = state.toString();
	StringUtils::trim(stateString);
	std::string actionStateString = actionState.toString();
	StringUtils::trim(actionStateString);

	std::replace(stateString.begin(), stateString.end(), ' ', '_');

	// Store in map called 'heuristic' every action,
	// and for every action, there is a vector of vectors which contains
	// the value of coefficients that correspond to facts (0 or 1)
	// Use the reward to learn the coefficients

	parseStateToValues(stateString);

	if (heuristic.count(actionStateString)) {
	SystemUtils::log("\tUpdate for action: " + actionStateString, reward);
	updateCoefficients(stateString, heuristic[actionStateString], reward);
	} else {
	std::vector<std::vector<double>> coefs(state.deterministicStateFluents.size() + state.probabilisticStateFluents.size(), std::vector<double>(2, 0.0));
	heuristic[actionStateString] = coefs;
	updateCoefficients(stateString, heuristic[actionStateString], reward);
	}
	}


	double OfflineHeuristic::calculateSumOfMultipliers(std::vector<std::vector<double>> coefficients, std::vector<double> stateValues) {
	double res = 0.0;
	assert(coefficients.size() == stateValues.size());
	for(unsigned i = 0; i < coefficients.size(); i++) {
	res += coefficients[i][(int)stateValues[i]];
	}

	return res;
	}

	/******************************************************************
	Gradual Descent
	******************************************************************/

	void GradualDescent::updateCoefficients(std::string state, std::vector<std::vector<double>>& coefs, double reward) {
	// Initial values of coefficients are given by initial step and are all 0.0
	// The method that is being performed here is done in a floowing way:
	// For every (feature, weight) pair - feature being the value of deterministic or nondetermistic state fluent and weight being the value of coefficient corresponding to that fluent - the values of weight are modified so that in sum they approach to the reward value
	// Every weight is actually a pair for each feature - feature is described as a fact and takes values from the set {0, 1} and for every value there is a coefficient
	// How fast the method is approaching to the solution depends on the step size, given by user
	// Depending on the sign of the fluent, step size is either added to or subtracted from the weight accross all weights
	// The algorithm stops in one of two cases:
	// - when precision of EPS = 0.000000001 is satisfied between the given reward and the one calculated from current (feature, weights) combination
	// - when two subsequent calculated values are equal to the precision of EPS

	// In case the state is being visited for the fist time, we need to parse it so that we are able to retrieve fast the values
	parseStateToValues(state);

	std::vector<double> stateVals = stateValues[state];
	double calculatedReward = calculateSumOfMultipliers(coefs, stateVals);

	double difference = std::fabs(reward - calculatedReward);

	while(!MathUtils::doubleIsEqual(reward, calculatedReward)) {
	for(unsigned i = 0; i < coefs.size(); i++) {
	// We take the difference and multiply it with the step -> first change is the biggest and the closer we are to the solution, the smaller the move is
	// stateVals[i] can be either 0 or 1 which will modify the right coefficient

	if (type == GradualDescent::DEMOCRACY) {
	SystemUtils::log("\t\tOld value: ", coefs[i][(int)stateVals[i]], i );
	coefs[i][(int)stateVals[i]] += MathUtils::sign(reward - calculatedReward) * (difference / coefs.size() * step);
	SystemUtils::log("\t\tNew value: ", coefs[i][(int)stateVals[i]], (int)stateVals[i]);
	} else {
	SystemUtils::abort("Descent type not supported yet.");
	}

	}

	double tempReward = calculateSumOfMultipliers(coefs, stateVals);
	double tempDifference = std::fabs(reward - calculatedReward);
	// If we reached satisfying solution or the newest change is worse than previous, we break the loop (after fixing in later case)
	if (MathUtils::doubleIsEqual(tempReward, calculatedReward) \|\| MathUtils::doubleIsGreaterOrEqual(tempDifference, difference)) {
	// Go step back - return to the best case
	for(unsigned i = 0; i < coefs.size(); i++) {
	if (type == GradualDescent::DEMOCRACY) {
	coefs[i][(int)stateVals[i]] -= MathUtils::sign(reward - calculatedReward) * (difference / coefs.size() * step);
	} else {
	SystemUtils::abort("Descent type not supported yet.");
	}
	}
	break;
	}

	calculatedReward = tempReward;
	difference = tempDifference;
	}
	}

	bool GradualDescent::setValueFromString(std::string& param,
	std::string& value) {
	if (param == "-learn") {
	// Enable learning of the heuristic
	if (value == "ON") {
	setLearnHeuristic(true);
	} else if (value == "OFF") {
	setLearnHeuristic(false);
	}
	return true;
	} else if (param == "-loadFrom") {
	// Load existing heuristic
	std::ifstream f(value.c_str());
	if (!f.good()) {
	SystemUtils::abort("File " + value + " doesn't exist.");
	return false;
	}
	setHeuristicFileLocationToLoad(value.c_str());
	parseHeuristicFromFile(value);
	return true;
	} else if (param == "-step") {
	// Set the step of the learning rate
	setStep(atof(value.c_str()));
	return true;
	} else if (param == "-storeTo") {
	// Set where the offline heuristic will be stored
	setHeuristicFileLocationToStore(value.c_str());
	return true;
	} else if (param == "-type") {
	// Type of Descent
	if (value == "DEMOCRACY") {
	setType(GradualDescent::DescentType::DEMOCRACY);
	} else {
	SystemUtils::abort("Unknown descent type: " + value);
	}
	return true;
	}

	return false;
	}

	std::string GradualDescent::getDescentType() {
	if (type == GradualDescent::DescentType::ALL_AS_EQUAL) {
	return "ALL_AS_EQUAL";
	} else if (type == GradualDescent::DescentType::BIAS) {
	return "BIAS";
	} else if (type == GradualDescent::DescentType::DEMOCRACY) {
	return "DEMOCRACY";
	} else {
	SystemUtils::abort("Descent type is not set.");
	return "";
	}
	}


	void GradualDescent::setStep(double _step) {
	if (MathUtils::doubleIsGreater(_step, 1) \|\| MathUtils::doubleIsSmaller(_step, 0)) {
	SystemUtils::abort("Step in 'GradualDescent' takes value between 0 and 1.'");
	}

	step = _step;
	}