Skip to content

Instantly share code, notes, and snippets.

@yoavlt
Created August 2, 2014 05:34
Show Gist options
  • Save yoavlt/0269c5d38b7d45c59dd3 to your computer and use it in GitHub Desktop.
Save yoavlt/0269c5d38b7d45c59dd3 to your computer and use it in GitHub Desktop.
#ifndef _HMM_HPP_
#define _HMM_HPP_
#include <vector>
#include <string>
#include <list>
#include <utility>
#include <map>
template<typename T>
class Label {
public:
Label() { }
Label(const T& value) : m_value(value) { }
Label(const Label<T>& rhs) : m_value(rhs.value()) { }
bool operator=(const Label<T>& rhs) {
m_value = rhs.m_value;
}
public:
T value() const { return m_value; }
private:
T m_value;
};
template<typename T>
static bool operator==(const Label<T>& lhs, const Label<T>& rhs) {
return lhs.value() == rhs.value();
}
template<typename T>
static bool operator<(const Label<T>& lhs, const Label<T>& rhs) {
return lhs.value() < rhs.value();
}
template<typename T>
static bool operator>(const Label<T>& lhs, const Label<T>& rhs) {
return lhs.value() > rhs.m_value;
}
template<typename T>
static bool operator!=(const Label<T>& lhs, const Label<T>& rhs) {
return lhs.value() != rhs.value();
}
typedef Label<std::string> SLabel;
typedef std::map<SLabel, float> Mapf;
typedef std::map<SLabel, double> Mapd;
static inline Mapf normalize(const Mapf& probs) {
std::vector<SLabel> keys;
float sum = 0.0f;
for (auto& prob : probs) {
keys.push_back(prob.first);
sum += prob.second;
}
Mapf result;
for (auto& key : keys) {
result.insert(std::make_pair(key, probs.find(key)->second / sum));
}
return result;
}
static inline Mapf normalize(const std::vector<SLabel>& states, const Mapf& probs) {
Mapf result;
float sum = 0.0f;
for (auto& prob : probs) {
sum += prob.second;
}
for (auto& state : states) {
result.insert(std::make_pair(state, probs.find(state)->second / sum));
}
return result;
}
static inline std::map<SLabel, Mapf> normalize(const std::vector<SLabel>& states, const std::map<SLabel, Mapf>& probs) {
std::map<SLabel, Mapf> normalized;
for (auto& prob : probs) {
Label<std::string> key = prob.first;
normalized.insert(std::make_pair(key, normalize(prob.second)));
}
return normalized;
}
/* implementation of hidden marcov model */
class HmmModel {
public:
HmmModel(const std::vector<SLabel>& states,
const std::vector<SLabel>& symbols,
const Mapf& start_prob,
const std::map<SLabel, Mapf>& trans_prob,
const std::map<SLabel, Mapf>& emit_prob)
:m_states(states),
m_symbols(symbols)
{
m_start_prob = normalize(states, start_prob);
m_trans_prob = normalize(states, trans_prob);
m_emit_prob = normalize(states, emit_prob);
}
public:
/*
* forward algorithm
*/
float evaluate(const std::vector<SLabel>& sequence) {
std::vector<Mapf> az = alpha(sequence);
float score = 0.0f;
for (auto& value : az[az.size() - 1]) {
score += value.second;
}
return score;
}
/* viterbi algorithm */
std::list<SLabel> decode(const std::vector<SLabel>& sequence) {
/* first, forward step */
Mapf prev;
for (auto& state : m_states) {
float prob = m_start_prob[state] * m_emit_prob[state][sequence[0]];
prev.insert(std::make_pair(state, prob));
}
std::vector<std::map<SLabel, SLabel> > states;
for (auto& SLabel : sequence) {
Mapf current;
std::map<Label<std::string>, Label<std::string> > pre_states;
for (auto& state_to : m_states) {
float max_prob = 0.0f;
Label<std::string> max_state("None");
for (auto& state_from : m_states) {
float prob = prev[state_from] * trans_prob(state_from, state_to);
if (max_prob < prob) {
max_prob = prob;
max_state = state_from;
}
}
float bar = max_prob * m_emit_prob[state_to][SLabel];
current.insert(std::make_pair(state_to, bar));
pre_states.insert(std::make_pair(state_to, max_state));
}
prev = current;
states.push_back(pre_states);
}
/* find last state */
float max_prob = 0.0f;
Label<std::string> max_state("None");
for (auto& state : m_states) {
if (prev[state] > max_prob) {
max_prob = prev[state];
max_state = state;
}
}
/* second, backward step */
std::list<SLabel> result = {max_state};
for (int i = sequence.size() - 1; i != 0; --i) {
max_state = states[i][max_state];
result.push_front(max_state);
}
return result;
}
private:
std::vector<Mapf> alpha(const std::vector<SLabel>& sequence) {
std::vector<Mapf> alpha;
Mapf first_alpha;
for (auto& state : m_states) {
first_alpha.insert(std::make_pair(state, start_prob(state) * emit_prob(state, sequence[0])));
}
alpha.push_back(first_alpha);
for (int i = 1; i < sequence.size(); ++i) {
Mapf trans;
for (auto& state_to : m_states) {
float prob = 0.0f;
for (auto& state_from : m_states) {
float a = *find_map<float>(alpha[i - 1], state_from);
float b = trans_prob(state_from, state_to);
prob += a * b;
}
trans.insert(std::make_pair(state_to, prob * emit_prob(state_to, sequence[i])));
}
alpha.push_back(trans);
}
return alpha;
}
public:
template<typename T>
const T* find_map(const std::map<SLabel, T>& map, const SLabel& label) const {
auto find = map.find(label);
if (find == map.end()) {
return nullptr;
}
return &find->second;
}
float start_prob(const SLabel& label) const {
auto prob = find_map<float>(m_start_prob, label);
if (prob == nullptr) {
return 0.0f;
}
return *prob;
}
float trans_prob(const SLabel& from, const SLabel& to) const {
auto pack = find_map<Mapf>(m_trans_prob, from);
if (pack == nullptr) {
return 0.0f;
}
auto prob = find_map<float>(*pack, to);
if (prob == nullptr) {
return 0.0f;
}
return *prob;
}
float emit_prob(const SLabel& from, const SLabel& to) const {
auto pack = find_map<Mapf>(m_emit_prob, from);
if (pack == nullptr) {
return 0.0f;
}
auto prob = find_map<float>(*pack, to);
if (prob == nullptr) {
return 0.0f;
}
return *prob;
}
private:
const std::vector<SLabel>& m_states;
const std::vector<SLabel>& m_symbols;
Mapf m_start_prob;
std::map<SLabel, Mapf> m_trans_prob;
std::map<SLabel, Mapf> m_emit_prob;
};
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment