Skip to content

Instantly share code, notes, and snippets.

@lp6m
Created October 6, 2019 08:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lp6m/29449f7690a76d331c41ab9248b183a9 to your computer and use it in GitHub Desktop.
Save lp6m/29449f7690a76d331c41ab9248b183a9 to your computer and use it in GitHub Desktop.
bdeu_score.cpp
/*example dataset
4 3
A B C D
A B
B C
D C
10
1 1 0 1
0 2 1 1
2 1 0 0
1 1 0 2
1 1 0 1
0 0 1 2
0 2 1 1
1 0 1 1
1 1 2 1
1 2 0 1
*/
#include <bits/stdc++.h>
using namespace std;
#define FOR(i,a,b) for(int i=(int)(a);i<(int)(b);i++)
#define REP(i,n) for(int i=0;i<(int)(n);i++)
void showVI(vector<int> data){
cout << "[";
REP(i, data.size()){
cout << data[i];
if(i != data.size() - 1) cout << ", ";
}
cout << "]";
}
void showVVI(vector<vector<int>> data){
cout << "[";
REP(i, data.size()){
cout << "[";
REP(j, data[i].size()){
cout << data[i][j];
if(j != data[i].size() - 1) cout << ", ";
}
cout << "]";
if(i != data.size() - 1) cout << ", ";
}
cout << "]" << endl;
}
class Node{
public:
string name;
vector<Node> parents;
Node();
Node(string);
void add_parents(Node);
};
Node::Node(){}
Node::Node(string m_name): name(m_name){
}
void Node::add_parents(Node node){
this->parents.push_back(node);
}
class BayesianNetwork{
public:
vector<string> column_names;
BayesianNetwork();
BayesianNetwork(vector<pair<string, string>>);
void add_edge(string, string);
void set_graph(vector<pair<string, string>> arrowList);
void show_nodes();
vector<string> get_node_names();
vector<Node> get_nodes();
vector<Node> get_parent_nodes(string);
vector<Node> get_parent_nodes(Node);
void save_as_dotscript(string);
private:
map<string, Node> node_dic;
void check_graph_structure();
string to_dot_script();
};
BayesianNetwork::BayesianNetwork(){
}
BayesianNetwork::BayesianNetwork(vector<pair<string, string>> arrowList){
set_graph(arrowList);
}
void BayesianNetwork::set_graph(vector<pair<string, string>> edgeList){
for(auto edge :edgeList){
this->add_edge(edge.first, edge.second);
}
}
void BayesianNetwork::add_edge(string from_name, string to_name){
if(this->node_dic.count(from_name) == 0) this->node_dic[from_name] = Node(from_name);
if(this->node_dic.count(to_name) == 0) this->node_dic[to_name] = Node(to_name);
this->node_dic[to_name].add_parents(this->node_dic[from_name]);
}
void BayesianNetwork::check_graph_structure(){
//TODO
}
void BayesianNetwork::show_nodes(){
for(auto itr = this->node_dic.begin(); itr != this->node_dic.end(); itr++){
cout << itr->first << ", ";
}
cout << endl;
}
vector<string> BayesianNetwork::get_node_names(){
vector<string> names;
for(auto itr = this->node_dic.begin(); itr != this->node_dic.end(); itr++){
names.push_back(itr->first);
}
return names;
}
vector<Node> BayesianNetwork::get_nodes(){
vector<Node> nodes;
for(auto itr = this->node_dic.begin(); itr != this->node_dic.end(); itr++){
nodes.push_back(itr->second);
}
return nodes;
}
vector<Node> BayesianNetwork::get_parent_nodes(Node child_node){
return child_node.parents;
}
vector<Node> BayesianNetwork::get_parent_nodes(string child_name){
if(this->node_dic.count(child_name) == 0) throw "Exception: network does not have specified child node: " + child_name;
Node node = this->node_dic[child_name];
return BayesianNetwork::get_parent_nodes(node);
}
string BayesianNetwork::to_dot_script(){
//convert network structure to DOT script for graphviz
string s = "digraph{\n";
for(auto itr = this->node_dic.begin(); itr != this->node_dic.end(); itr++){
Node node = itr->second;
for(Node parent: node.parents){
s += " " + parent.name + " -> " + node.name + "\n";
}
}
s += "}\n";
return s;
}
void BayesianNetwork::save_as_dotscript(string filename){
ofstream outputfile(filename);
outputfile << this->to_dot_script();
outputfile.close();
}
class Column{
public:
Column(string);
string name;
int get_kind_num();
void update_kind_num(int);
vector<int> get_kinds();
private:
set<int> value_kind_set; //number of kind of data
};
Column::Column(string m_name): name(m_name){
}
vector<int> Column::get_kinds(){
return vector<int>(this->value_kind_set.begin(), this->value_kind_set.end());
}
int Column::get_kind_num(){
return this->value_kind_set.size();
}
void Column::update_kind_num(int val){
this->value_kind_set.insert(val);
}
class DataSet{
public:
vector<vector<int>> data;
void set_data(vector<string>, vector<vector<int>>);
void set_data_fromfile(string);
int get_column_index_from_name(string);
vector<Column> columns;
private:
void add_data(vector<int>);
void set_column(vector<string>);
map<string, int> column_index_of_name;
};
void DataSet::set_column(vector<string> columns){
int i = 0;
for(string name: columns){
this->columns.push_back(Column(name));
this->column_index_of_name[name] = i++;
}
}
int DataSet::get_column_index_from_name(string name){
return this->column_index_of_name[name];
}
void DataSet::add_data(vector<int> oneline){
data.push_back(oneline);
REP(i, oneline.size()){
int val = oneline[i];
this->columns[i].update_kind_num(val);
}
}
void DataSet::set_data(vector<string> column_names, vector<vector<int>> data){
this->set_column(column_names);
for(vector<int> oneline : data){
this->add_data(oneline);
}
}
void DataSet::set_data_fromfile(string filename){
//TODO
}
class BdeuScore{
public:
BdeuScore(BayesianNetwork, DataSet);
double get_score();
private:
BayesianNetwork model;
DataSet dataset;
double get_local_score(Node variable, vector<Node> parents);
struct StateCount{
int parents_candidates_num;
int child_candidates_num;
map<vector<int>, pair<int, map<int, int>>> countmap;
StateCount(int pnum, int cnum, map<vector<int>, pair<int, map<int, int>>> countmap){
this->parents_candidates_num = pnum;
this->child_candidates_num = cnum;
this->countmap = countmap;
}
};
BdeuScore::StateCount get_state_counts(Node variable, vector<Node> parents);
};
BdeuScore::BdeuScore(BayesianNetwork _model, DataSet _dataset): model(_model), dataset(_dataset){
}
double BdeuScore::get_score(){
double score = 0;
auto nodes = this->model.get_nodes();
for(Node node: nodes){
score += this->get_local_score(node, node.parents);
}
return score;
}
double BdeuScore::get_local_score(Node variable, vector<Node> parents){
const double ess = 10.0;//equivalent_sample_size default value refers to pgmpy implementation
double local_score = 0;
StateCount statecount = this->get_state_counts(variable, parents);
double nik_prime = (double)ess/(double)statecount.parents_candidates_num;
double nijk_prime = nik_prime/(double)statecount.child_candidates_num;
//does not iterate all combinations. only iterate in existing combinations
for(auto pitr = statecount.countmap.begin(); pitr != statecount.countmap.end(); pitr++){
double nik = (double)(pitr->second.first);
auto childmap = pitr->second.second;
if(nik > 0) local_score += lgamma(nik_prime) - lgamma(nik + nik_prime);
for(auto citr = childmap.begin(); citr != childmap.end(); citr++){
double nijk = (double)(citr->second);
if(nijk > 0) local_score += lgamma(nijk_prime + nijk) - lgamma(nijk_prime);
}
}
return local_score;
}
BdeuScore::StateCount BdeuScore::get_state_counts(Node variable, vector<Node> parents){
DataSet dataset = this->dataset;
if(parents.size() > 0){
int parents_candidates_num = 1;
vector<int> parent_indices;
for(Node parent_node: parents){
string parent_name = parent_node.name;
int column_index = dataset.get_column_index_from_name(parent_name);
parent_indices.push_back(column_index);
parents_candidates_num *= dataset.columns[column_index].get_kind_num();
}
int child_column_index = dataset.get_column_index_from_name(variable.name);
int child_candidates_num = dataset.columns[child_column_index].get_kind_num();
map<vector<int>, pair<int, map<int, int>>> family_states_countmap;
for(vector<int> oneline: dataset.data){
vector<int> ar;
for(int index: parent_indices){
ar.push_back(oneline[index]);
}
if(family_states_countmap.count(ar) == 0){
map<int,int> m;
family_states_countmap[ar] = make_pair(0, m);
}
family_states_countmap[ar].first++;
int child_val = oneline[child_column_index];
if(family_states_countmap[ar].second.count(child_val) == 0){
family_states_countmap[ar].second[child_val] = 0;
}
family_states_countmap[ar].second[child_val]++;
}
return BdeuScore::StateCount(parents_candidates_num, child_candidates_num, family_states_countmap);
}else{
//leaf = no parent
int child_column_index = dataset.get_column_index_from_name(variable.name);
int child_candidates_num = dataset.columns[child_column_index].get_kind_num();
map<int,int> child_countmap;
for(vector<int> oneline: dataset.data){
int val = oneline[child_column_index];
if(child_countmap.count(val) == 0) child_countmap[val] = 0;
child_countmap[val]++;
}
vector<int> dummy_parent = vector<int>(1);
map<vector<int>, pair<int, map<int, int>>> states_countmap;
states_countmap[dummy_parent] = make_pair(dataset.data.size(), child_countmap);
return BdeuScore::StateCount(1, child_candidates_num, states_countmap);
}
}
int main(){
BayesianNetwork bn;
vector<string> node_ames;
int node_num, edge_num;
cin >> node_num >> edge_num;
vector<string> column_names;
REP(i, node_num){
string column_name;
cin >> column_name;
column_names.push_back(column_name);
}
REP(i, edge_num){
string from, to;
cin >> from >> to;
bn.add_edge(from, to);
}
bn.show_nodes();
bn.save_as_dotscript("graph.dot");
int data_num;
cin >> data_num;
vector<vector<int>> dataarr;
REP(i, data_num){
vector<int> oneline;
REP(j, column_names.size()){
int val;
cin >> val;
oneline.push_back(val);
}
dataarr.push_back(oneline);
}
DataSet dataset;
dataset.set_data(column_names, dataarr);
REP(i, column_names.size()){
cout << dataset.columns[i].name << " : ";
cout << dataset.columns[i].get_kind_num() << endl;
}
BdeuScore bdeuscore(bn, dataset);
double score = bdeuscore.get_score();
cout << score << endl;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment