Created
November 15, 2014 23:34
-
-
Save earlwlkr/e41abad644e5b286dbf3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <sstream> | |
#include <iostream> | |
#include <algorithm> | |
#include <vector> | |
#include "ClientInfo.h" | |
#include "IntervalScaledAttribute.h" | |
#include "BinaryAttribute.h" | |
#include "CategoricalAttribute.h" | |
double ClientInfo::min_[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF , | |
0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF }; | |
double ClientInfo::max_[] = { 0 }; | |
ClientInfo::ClientInfo(bool is_test) | |
{ | |
attributes_.resize(NUM_ATTRIBUTES - std::size_t(is_test)); | |
for (int i = 0, l = attributes_.size(); i != l; i++) | |
attributes_[i] = nullptr; | |
} | |
ClientInfo::~ClientInfo() | |
{} | |
void ClientInfo::ReadData(std::string line) | |
{ | |
std::istringstream iss(line); | |
for (int i = 0, k = attributes_.size(); i != k; i++) | |
{ | |
std::string item; | |
std::getline(iss, item, ';'); | |
if (item.length() == 0) | |
break; | |
item.erase(std::remove(item.begin(), item.end(), '"'), item.end()); | |
if (item == "age") | |
break; | |
switch (i) | |
{ | |
case 0: case 5: case 9: case 11: case 12: case 13: case 14: | |
{ | |
double val = std::stof(item); | |
if (val > max_[i]) | |
max_[i] = val; | |
else if (val < min_[i]) | |
min_[i] = val; | |
attributes_[i] = std::make_shared<IntervalScaledAttribute>(); | |
attributes_[i]->SetValue(val); | |
break; | |
} | |
case 1: case 2: case 8: case 10: | |
{ | |
attributes_[i] = std::make_shared<CategoricalAttribute>(); | |
attributes_[i]->SetValue(item); | |
break; | |
} | |
case 4: case 6: case 7: case 16: | |
{ | |
attributes_[i] = std::make_shared<BinaryAttribute>(); | |
attributes_[i]->SetValue(item); | |
break; | |
} | |
default: // Ordinal | |
{ | |
double val = 0; | |
std::vector<std::string> values; | |
if (i == 3) | |
values = { "unknown", "primary", "secondary", "tertiary" }; | |
else if (i == 15) | |
values = { "unknown", "other", "failure", "success" }; | |
for (std::size_t j = 0, l = values.size(); j != l; j++) | |
{ | |
if (item == values[j]) | |
{ | |
val = (double)(j / (l - 1)); | |
break; | |
} | |
} | |
if (val > max_[i]) | |
max_[i] = val; | |
else if (val < min_[i]) | |
min_[i] = val; | |
attributes_[i] = std::make_shared<IntervalScaledAttribute>(); | |
attributes_[i]->SetValue(val); | |
break; | |
} | |
} | |
} | |
} | |
bool ClientInfo::IsValid() const | |
{ | |
for (int i = 0, l = attributes_.size(); i != l; i++) | |
{ | |
if (attributes_[i] == nullptr) | |
return false; | |
} | |
return true; | |
} | |
double ClientInfo::ComputeDistance(const std::shared_ptr<ClientInfo> other_client) const | |
{ | |
double distance = 0.0f; | |
const int NUM_EFFECTIVE_ATTRS = NUM_ATTRIBUTES - 1; | |
for (int i = 0; i != NUM_EFFECTIVE_ATTRS; i++) | |
{ | |
double addition = attributes_[i]->ComputeDistance(other_client->attributes_[i]); | |
if (attributes_[i]->GetType() == DATA_TYPE_INTERVAL_SCALED) | |
{ | |
addition /= (max_[i] - min_[i]); | |
} | |
distance += addition; | |
} | |
distance /= NUM_EFFECTIVE_ATTRS; | |
//std::cout << distance << '\n'; | |
return distance; | |
} | |
bool ClientInfo::GetOutput() const | |
{ | |
return attributes_[NUM_ATTRIBUTES - 1]->GetValue() == "yes"; | |
} | |
std::vector<std::shared_ptr<DataAttribute>> ClientInfo::GetAttributes() const | |
{ | |
return attributes_; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment