Last active
December 17, 2015 07:19
-
-
Save renatooliveira/5571894 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* RUExtractor.cpp | |
* | |
* Created on: May 6, 2013 | |
* Author: renato | |
*/ | |
#include "RUExtractor.hpp" | |
#include <math.h> | |
#include <algorithm.h> | |
#if DEBUG | |
#define db(nome, obj) do{ cout << nome << obj << endl} while(0); | |
#else | |
#define db(nome, obj) | |
#endif | |
static unsigned int module_size(unsigned module) | |
{ | |
return max<unsigned int>(module, 1); | |
} | |
static double alertAmount(const vector<CTuple>& tuples) | |
{ | |
double amount = 0.0; | |
for (auto it = tuples.begin(); it != tuples.end(); ++it) | |
amount += *it.alertCount; | |
return amount; | |
} | |
/** | |
* Calculates the entropy of the CTables | |
* | |
* entropy = SUM p(a) * log_2 p(a) | |
*/ | |
statuc double entropy(const vector<CTuple>& tuples) { | |
double alertTotal = alertAmount(tuples); | |
double entropy = 0.0; // ? | |
for (auto it = tuples.begin(); it != tuples.end(); ++it){ | |
double p = *it.alertCount / alertTotal; | |
double log_p = (log (p)) / log (2); | |
entropy += (p * log_p); | |
} | |
return -entropy; | |
} | |
static double RU(double entropy, double module, double alertAmount){ | |
return entropy / ((log (min<double>(module, alertAmount))) / log (2)); | |
} | |
pair<vector<CTuple>, vector<CTuple>> RUExtractor::extractingSignificantClusters(const CTable& table) { | |
double alpha = 0.02; | |
const double beta = 0.9; | |
const int k = 1; | |
vector<CTuple> tuples = table.clusterTable; | |
vector<CTuple> S; | |
double entropy = getEntropy(tuples); | |
double module = module_size(tuples.size()); | |
double alertAmount = alertAmount(tuples); | |
double RU = getRU(entropy, module, alertAmount); | |
while(RU < beta) { | |
db ("RU ", RU); | |
alpha *= pow(2, -k); | |
for (auto it = tuples.begin(); it != tuples.end(); ++it) { | |
CTuple tuple =*it; | |
double p = tuple.alertCount / alertAmount; | |
if (p < alpha) { | |
S.push_back(tuple); | |
tuples.erase(tuple); | |
} | |
} | |
module = module_size(tuples.size()); | |
entropy = entropy(tuples); | |
alertAmount = alertAmount(cTuples); | |
RU = getRU(entropy, module, alertAmount); | |
} | |
pair<vector<CTuple>, vector<CTuple>> ret; | |
ret.first = S; | |
ret.second = tuples; | |
return ret; | |
} | |
RUExtractor::RUExtractor(){} | |
RUExtractor::~RUExtractor(){} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment