Created
August 2, 2014 05:34
-
-
Save yoavlt/0269c5d38b7d45c59dd3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef _HMM_HPP_ | |
#define _HMM_HPP_ | |
#include <vector> | |
#include <string> | |
#include <list> | |
#include <utility> | |
#include <map> | |
template<typename T> | |
class Label { | |
public: | |
Label() { } | |
Label(const T& value) : m_value(value) { } | |
Label(const Label<T>& rhs) : m_value(rhs.value()) { } | |
bool operator=(const Label<T>& rhs) { | |
m_value = rhs.m_value; | |
} | |
public: | |
T value() const { return m_value; } | |
private: | |
T m_value; | |
}; | |
template<typename T> | |
static bool operator==(const Label<T>& lhs, const Label<T>& rhs) { | |
return lhs.value() == rhs.value(); | |
} | |
template<typename T> | |
static bool operator<(const Label<T>& lhs, const Label<T>& rhs) { | |
return lhs.value() < rhs.value(); | |
} | |
template<typename T> | |
static bool operator>(const Label<T>& lhs, const Label<T>& rhs) { | |
return lhs.value() > rhs.m_value; | |
} | |
template<typename T> | |
static bool operator!=(const Label<T>& lhs, const Label<T>& rhs) { | |
return lhs.value() != rhs.value(); | |
} | |
typedef Label<std::string> SLabel; | |
typedef std::map<SLabel, float> Mapf; | |
typedef std::map<SLabel, double> Mapd; | |
static inline Mapf normalize(const Mapf& probs) { | |
std::vector<SLabel> keys; | |
float sum = 0.0f; | |
for (auto& prob : probs) { | |
keys.push_back(prob.first); | |
sum += prob.second; | |
} | |
Mapf result; | |
for (auto& key : keys) { | |
result.insert(std::make_pair(key, probs.find(key)->second / sum)); | |
} | |
return result; | |
} | |
static inline Mapf normalize(const std::vector<SLabel>& states, const Mapf& probs) { | |
Mapf result; | |
float sum = 0.0f; | |
for (auto& prob : probs) { | |
sum += prob.second; | |
} | |
for (auto& state : states) { | |
result.insert(std::make_pair(state, probs.find(state)->second / sum)); | |
} | |
return result; | |
} | |
static inline std::map<SLabel, Mapf> normalize(const std::vector<SLabel>& states, const std::map<SLabel, Mapf>& probs) { | |
std::map<SLabel, Mapf> normalized; | |
for (auto& prob : probs) { | |
Label<std::string> key = prob.first; | |
normalized.insert(std::make_pair(key, normalize(prob.second))); | |
} | |
return normalized; | |
} | |
/* implementation of hidden marcov model */ | |
class HmmModel { | |
public: | |
HmmModel(const std::vector<SLabel>& states, | |
const std::vector<SLabel>& symbols, | |
const Mapf& start_prob, | |
const std::map<SLabel, Mapf>& trans_prob, | |
const std::map<SLabel, Mapf>& emit_prob) | |
:m_states(states), | |
m_symbols(symbols) | |
{ | |
m_start_prob = normalize(states, start_prob); | |
m_trans_prob = normalize(states, trans_prob); | |
m_emit_prob = normalize(states, emit_prob); | |
} | |
public: | |
/* | |
* forward algorithm | |
*/ | |
float evaluate(const std::vector<SLabel>& sequence) { | |
std::vector<Mapf> az = alpha(sequence); | |
float score = 0.0f; | |
for (auto& value : az[az.size() - 1]) { | |
score += value.second; | |
} | |
return score; | |
} | |
/* viterbi algorithm */ | |
std::list<SLabel> decode(const std::vector<SLabel>& sequence) { | |
/* first, forward step */ | |
Mapf prev; | |
for (auto& state : m_states) { | |
float prob = m_start_prob[state] * m_emit_prob[state][sequence[0]]; | |
prev.insert(std::make_pair(state, prob)); | |
} | |
std::vector<std::map<SLabel, SLabel> > states; | |
for (auto& SLabel : sequence) { | |
Mapf current; | |
std::map<Label<std::string>, Label<std::string> > pre_states; | |
for (auto& state_to : m_states) { | |
float max_prob = 0.0f; | |
Label<std::string> max_state("None"); | |
for (auto& state_from : m_states) { | |
float prob = prev[state_from] * trans_prob(state_from, state_to); | |
if (max_prob < prob) { | |
max_prob = prob; | |
max_state = state_from; | |
} | |
} | |
float bar = max_prob * m_emit_prob[state_to][SLabel]; | |
current.insert(std::make_pair(state_to, bar)); | |
pre_states.insert(std::make_pair(state_to, max_state)); | |
} | |
prev = current; | |
states.push_back(pre_states); | |
} | |
/* find last state */ | |
float max_prob = 0.0f; | |
Label<std::string> max_state("None"); | |
for (auto& state : m_states) { | |
if (prev[state] > max_prob) { | |
max_prob = prev[state]; | |
max_state = state; | |
} | |
} | |
/* second, backward step */ | |
std::list<SLabel> result = {max_state}; | |
for (int i = sequence.size() - 1; i != 0; --i) { | |
max_state = states[i][max_state]; | |
result.push_front(max_state); | |
} | |
return result; | |
} | |
private: | |
std::vector<Mapf> alpha(const std::vector<SLabel>& sequence) { | |
std::vector<Mapf> alpha; | |
Mapf first_alpha; | |
for (auto& state : m_states) { | |
first_alpha.insert(std::make_pair(state, start_prob(state) * emit_prob(state, sequence[0]))); | |
} | |
alpha.push_back(first_alpha); | |
for (int i = 1; i < sequence.size(); ++i) { | |
Mapf trans; | |
for (auto& state_to : m_states) { | |
float prob = 0.0f; | |
for (auto& state_from : m_states) { | |
float a = *find_map<float>(alpha[i - 1], state_from); | |
float b = trans_prob(state_from, state_to); | |
prob += a * b; | |
} | |
trans.insert(std::make_pair(state_to, prob * emit_prob(state_to, sequence[i]))); | |
} | |
alpha.push_back(trans); | |
} | |
return alpha; | |
} | |
public: | |
template<typename T> | |
const T* find_map(const std::map<SLabel, T>& map, const SLabel& label) const { | |
auto find = map.find(label); | |
if (find == map.end()) { | |
return nullptr; | |
} | |
return &find->second; | |
} | |
float start_prob(const SLabel& label) const { | |
auto prob = find_map<float>(m_start_prob, label); | |
if (prob == nullptr) { | |
return 0.0f; | |
} | |
return *prob; | |
} | |
float trans_prob(const SLabel& from, const SLabel& to) const { | |
auto pack = find_map<Mapf>(m_trans_prob, from); | |
if (pack == nullptr) { | |
return 0.0f; | |
} | |
auto prob = find_map<float>(*pack, to); | |
if (prob == nullptr) { | |
return 0.0f; | |
} | |
return *prob; | |
} | |
float emit_prob(const SLabel& from, const SLabel& to) const { | |
auto pack = find_map<Mapf>(m_emit_prob, from); | |
if (pack == nullptr) { | |
return 0.0f; | |
} | |
auto prob = find_map<float>(*pack, to); | |
if (prob == nullptr) { | |
return 0.0f; | |
} | |
return *prob; | |
} | |
private: | |
const std::vector<SLabel>& m_states; | |
const std::vector<SLabel>& m_symbols; | |
Mapf m_start_prob; | |
std::map<SLabel, Mapf> m_trans_prob; | |
std::map<SLabel, Mapf> m_emit_prob; | |
}; | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment