yoavlt/hmm.hpp

## hmm.hpp
#ifndef _HMM_HPP_
#define _HMM_HPP_

#include <vector>
#include <string>
#include <list>
#include <utility>
#include <map>

template<typename T>
class Label {
public:
    Label() { }
    Label(const T& value) : m_value(value) { }
    Label(const Label<T>& rhs) : m_value(rhs.value()) { }
    bool operator=(const Label<T>& rhs) {
        m_value = rhs.m_value;
    }
public:
    T value() const { return m_value; }
private:
    T m_value;
};
template<typename T>
static bool operator==(const Label<T>& lhs, const Label<T>& rhs) {
    return lhs.value() == rhs.value();
}

template<typename T>
static bool operator<(const Label<T>& lhs, const Label<T>& rhs) {
    return lhs.value() < rhs.value();
}

template<typename T>
static bool operator>(const Label<T>& lhs, const Label<T>& rhs) {
    return lhs.value() > rhs.m_value;
}

template<typename T>
static bool operator!=(const Label<T>& lhs, const Label<T>& rhs) {
    return lhs.value() != rhs.value();
}
typedef Label<std::string> SLabel;

typedef std::map<SLabel, float> Mapf;
typedef std::map<SLabel, double> Mapd;

static inline Mapf normalize(const Mapf& probs) {
    std::vector<SLabel> keys;
    float sum = 0.0f;
    for (auto& prob : probs) {
        keys.push_back(prob.first);
        sum += prob.second;
    }
    Mapf result;
    for (auto& key : keys) {
        result.insert(std::make_pair(key, probs.find(key)->second / sum));
    }
    return result;
}

static inline Mapf normalize(const std::vector<SLabel>& states, const Mapf& probs) {
    Mapf result;
    float sum = 0.0f;
    for (auto& prob : probs) {
        sum += prob.second;
    }
    for (auto& state : states) {
        result.insert(std::make_pair(state, probs.find(state)->second / sum));
    }
    return result;
}

static inline std::map<SLabel, Mapf> normalize(const std::vector<SLabel>& states, const std::map<SLabel, Mapf>& probs) {
    std::map<SLabel, Mapf> normalized;
    for (auto& prob : probs) {
        Label<std::string> key = prob.first;
        normalized.insert(std::make_pair(key, normalize(prob.second)));
    }
    return normalized;
}

/* implementation of hidden marcov model */
class HmmModel {
public:
    HmmModel(const std::vector<SLabel>& states,
            const std::vector<SLabel>& symbols,
            const Mapf& start_prob,
            const std::map<SLabel, Mapf>& trans_prob,
            const std::map<SLabel, Mapf>& emit_prob)
        :m_states(states),
         m_symbols(symbols)
    {
        m_start_prob = normalize(states, start_prob);
        m_trans_prob = normalize(states, trans_prob);
        m_emit_prob = normalize(states, emit_prob);
    }

public:
    /*
     * forward algorithm
     */
    float evaluate(const std::vector<SLabel>& sequence) {
        std::vector<Mapf> az = alpha(sequence);
        float score = 0.0f;
        for (auto& value : az[az.size() - 1]) {
            score += value.second;
        }
        return score;
    }

    /* viterbi algorithm */
    std::list<SLabel> decode(const std::vector<SLabel>& sequence) {
        /* first, forward step */
        Mapf prev;
        for (auto& state : m_states) {
            float prob = m_start_prob[state] * m_emit_prob[state][sequence[0]];
            prev.insert(std::make_pair(state, prob));
        }

        std::vector<std::map<SLabel, SLabel> > states;
        for (auto& SLabel : sequence) {
            Mapf current;
            std::map<Label<std::string>, Label<std::string> > pre_states;
            for (auto& state_to : m_states) {
                float max_prob = 0.0f;
                Label<std::string> max_state("None");
                for (auto& state_from : m_states) {
                    float prob = prev[state_from] * trans_prob(state_from, state_to);
                    if (max_prob < prob) {
                        max_prob = prob;
                        max_state = state_from;
                    }
                }
                float bar = max_prob * m_emit_prob[state_to][SLabel];
                current.insert(std::make_pair(state_to, bar));
                pre_states.insert(std::make_pair(state_to, max_state));
            }
            prev = current;
            states.push_back(pre_states);
        }

        /* find last state */
        float max_prob = 0.0f;
        Label<std::string> max_state("None");
        for (auto& state : m_states) {
            if (prev[state] > max_prob) {
                max_prob = prev[state];
                max_state = state;
            }
        }

        /* second, backward step */
        std::list<SLabel> result = {max_state};
        for (int i = sequence.size() - 1; i != 0; --i) {
            max_state = states[i][max_state];
            result.push_front(max_state);
        }

        return result;
    }

private:
    std::vector<Mapf> alpha(const std::vector<SLabel>& sequence) {
        std::vector<Mapf> alpha;
        Mapf first_alpha;
        for (auto& state : m_states) {
            first_alpha.insert(std::make_pair(state, start_prob(state) * emit_prob(state, sequence[0])));
        }
        alpha.push_back(first_alpha);

        for (int i = 1; i < sequence.size(); ++i) {
            Mapf trans;
            for (auto& state_to : m_states) {
                float prob = 0.0f;
                for (auto& state_from : m_states) {
                    float a = *find_map<float>(alpha[i - 1], state_from);
                    float b = trans_prob(state_from, state_to);
                    prob += a * b;
                }
                trans.insert(std::make_pair(state_to, prob * emit_prob(state_to, sequence[i])));
            }
            alpha.push_back(trans);
        }
        return alpha;
    }

public:
    template<typename T>
    const T* find_map(const std::map<SLabel, T>& map, const SLabel& label) const {
        auto find = map.find(label);
        if (find == map.end()) {
            return nullptr;
        }
        return &find->second;
    }
    float start_prob(const SLabel& label) const {
        auto prob = find_map<float>(m_start_prob, label);
        if (prob == nullptr) {
            return 0.0f;
        }
        return *prob;
    }
    float trans_prob(const SLabel& from, const SLabel& to) const {
        auto pack = find_map<Mapf>(m_trans_prob, from);
        if (pack == nullptr) {
            return 0.0f;
        }
        auto prob = find_map<float>(*pack, to);
        if (prob == nullptr) {
            return 0.0f;
        }
        return *prob;
    }
    float emit_prob(const SLabel& from, const SLabel& to) const {
        auto pack = find_map<Mapf>(m_emit_prob, from);
        if (pack == nullptr) {
            return 0.0f;
        }
        auto prob = find_map<float>(*pack, to);
        if (prob == nullptr) {
            return 0.0f;
        }
        return *prob;
    }

private:
    const std::vector<SLabel>& m_states;
    const std::vector<SLabel>& m_symbols;
    Mapf m_start_prob;
    std::map<SLabel, Mapf> m_trans_prob;
    std::map<SLabel, Mapf> m_emit_prob;
};

#endif
	#ifndef _HMM_HPP_
	#define _HMM_HPP_

	#include <vector>
	#include <string>
	#include <list>
	#include <utility>
	#include <map>

	template<typename T>
	class Label {
	public:
	Label() { }
	Label(const T& value) : m_value(value) { }
	Label(const Label<T>& rhs) : m_value(rhs.value()) { }
	bool operator=(const Label<T>& rhs) {
	m_value = rhs.m_value;
	}
	public:
	T value() const { return m_value; }
	private:
	T m_value;
	};
	template<typename T>
	static bool operator==(const Label<T>& lhs, const Label<T>& rhs) {
	return lhs.value() == rhs.value();
	}

	template<typename T>
	static bool operator<(const Label<T>& lhs, const Label<T>& rhs) {
	return lhs.value() < rhs.value();
	}

	template<typename T>
	static bool operator>(const Label<T>& lhs, const Label<T>& rhs) {
	return lhs.value() > rhs.m_value;
	}

	template<typename T>
	static bool operator!=(const Label<T>& lhs, const Label<T>& rhs) {
	return lhs.value() != rhs.value();
	}
	typedef Label<std::string> SLabel;

	typedef std::map<SLabel, float> Mapf;
	typedef std::map<SLabel, double> Mapd;

	static inline Mapf normalize(const Mapf& probs) {
	std::vector<SLabel> keys;
	float sum = 0.0f;
	for (auto& prob : probs) {
	keys.push_back(prob.first);
	sum += prob.second;
	}
	Mapf result;
	for (auto& key : keys) {
	result.insert(std::make_pair(key, probs.find(key)->second / sum));
	}
	return result;
	}

	static inline Mapf normalize(const std::vector<SLabel>& states, const Mapf& probs) {
	Mapf result;
	float sum = 0.0f;
	for (auto& prob : probs) {
	sum += prob.second;
	}
	for (auto& state : states) {
	result.insert(std::make_pair(state, probs.find(state)->second / sum));
	}
	return result;
	}

	static inline std::map<SLabel, Mapf> normalize(const std::vector<SLabel>& states, const std::map<SLabel, Mapf>& probs) {
	std::map<SLabel, Mapf> normalized;
	for (auto& prob : probs) {
	Label<std::string> key = prob.first;
	normalized.insert(std::make_pair(key, normalize(prob.second)));
	}
	return normalized;
	}

	/* implementation of hidden marcov model */
	class HmmModel {
	public:
	HmmModel(const std::vector<SLabel>& states,
	const std::vector<SLabel>& symbols,
	const Mapf& start_prob,
	const std::map<SLabel, Mapf>& trans_prob,
	const std::map<SLabel, Mapf>& emit_prob)
	:m_states(states),
	m_symbols(symbols)
	{
	m_start_prob = normalize(states, start_prob);
	m_trans_prob = normalize(states, trans_prob);
	m_emit_prob = normalize(states, emit_prob);
	}

	public:
	/*
	* forward algorithm
	*/
	float evaluate(const std::vector<SLabel>& sequence) {
	std::vector<Mapf> az = alpha(sequence);
	float score = 0.0f;
	for (auto& value : az[az.size() - 1]) {
	score += value.second;
	}
	return score;
	}

	/* viterbi algorithm */
	std::list<SLabel> decode(const std::vector<SLabel>& sequence) {
	/* first, forward step */
	Mapf prev;
	for (auto& state : m_states) {
	float prob = m_start_prob[state] * m_emit_prob[state][sequence[0]];
	prev.insert(std::make_pair(state, prob));
	}

	std::vector<std::map<SLabel, SLabel> > states;
	for (auto& SLabel : sequence) {
	Mapf current;
	std::map<Label<std::string>, Label<std::string> > pre_states;
	for (auto& state_to : m_states) {
	float max_prob = 0.0f;
	Label<std::string> max_state("None");
	for (auto& state_from : m_states) {
	float prob = prev[state_from] * trans_prob(state_from, state_to);
	if (max_prob < prob) {
	max_prob = prob;
	max_state = state_from;
	}
	}
	float bar = max_prob * m_emit_prob[state_to][SLabel];
	current.insert(std::make_pair(state_to, bar));
	pre_states.insert(std::make_pair(state_to, max_state));
	}
	prev = current;
	states.push_back(pre_states);
	}

	/* find last state */
	float max_prob = 0.0f;
	Label<std::string> max_state("None");
	for (auto& state : m_states) {
	if (prev[state] > max_prob) {
	max_prob = prev[state];
	max_state = state;
	}
	}

	/* second, backward step */
	std::list<SLabel> result = {max_state};
	for (int i = sequence.size() - 1; i != 0; --i) {
	max_state = states[i][max_state];
	result.push_front(max_state);
	}

	return result;
	}

	private:
	std::vector<Mapf> alpha(const std::vector<SLabel>& sequence) {
	std::vector<Mapf> alpha;
	Mapf first_alpha;
	for (auto& state : m_states) {
	first_alpha.insert(std::make_pair(state, start_prob(state) * emit_prob(state, sequence[0])));
	}
	alpha.push_back(first_alpha);

	for (int i = 1; i < sequence.size(); ++i) {
	Mapf trans;
	for (auto& state_to : m_states) {
	float prob = 0.0f;
	for (auto& state_from : m_states) {
	float a = *find_map<float>(alpha[i - 1], state_from);
	float b = trans_prob(state_from, state_to);
	prob += a * b;
	}
	trans.insert(std::make_pair(state_to, prob * emit_prob(state_to, sequence[i])));
	}
	alpha.push_back(trans);
	}
	return alpha;
	}

	public:
	template<typename T>
	const T* find_map(const std::map<SLabel, T>& map, const SLabel& label) const {
	auto find = map.find(label);
	if (find == map.end()) {
	return nullptr;
	}
	return &find->second;
	}
	float start_prob(const SLabel& label) const {
	auto prob = find_map<float>(m_start_prob, label);
	if (prob == nullptr) {
	return 0.0f;
	}
	return *prob;
	}
	float trans_prob(const SLabel& from, const SLabel& to) const {
	auto pack = find_map<Mapf>(m_trans_prob, from);
	if (pack == nullptr) {
	return 0.0f;
	}
	auto prob = find_map<float>(*pack, to);
	if (prob == nullptr) {
	return 0.0f;
	}
	return *prob;
	}
	float emit_prob(const SLabel& from, const SLabel& to) const {
	auto pack = find_map<Mapf>(m_emit_prob, from);
	if (pack == nullptr) {
	return 0.0f;
	}
	auto prob = find_map<float>(*pack, to);
	if (prob == nullptr) {
	return 0.0f;
	}
	return *prob;
	}

	private:
	const std::vector<SLabel>& m_states;
	const std::vector<SLabel>& m_symbols;
	Mapf m_start_prob;
	std::map<SLabel, Mapf> m_trans_prob;
	std::map<SLabel, Mapf> m_emit_prob;
	};

	#endif