Skip to content

Instantly share code, notes, and snippets.

@earlwlkr
Created November 15, 2014 23:34
Show Gist options
  • Save earlwlkr/e41abad644e5b286dbf3 to your computer and use it in GitHub Desktop.
Save earlwlkr/e41abad644e5b286dbf3 to your computer and use it in GitHub Desktop.
#include <sstream>
#include <iostream>
#include <algorithm>
#include <vector>
#include "ClientInfo.h"
#include "IntervalScaledAttribute.h"
#include "BinaryAttribute.h"
#include "CategoricalAttribute.h"
double ClientInfo::min_[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF ,
0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF };
double ClientInfo::max_[] = { 0 };
ClientInfo::ClientInfo(bool is_test)
{
attributes_.resize(NUM_ATTRIBUTES - std::size_t(is_test));
for (int i = 0, l = attributes_.size(); i != l; i++)
attributes_[i] = nullptr;
}
ClientInfo::~ClientInfo()
{}
void ClientInfo::ReadData(std::string line)
{
std::istringstream iss(line);
for (int i = 0, k = attributes_.size(); i != k; i++)
{
std::string item;
std::getline(iss, item, ';');
if (item.length() == 0)
break;
item.erase(std::remove(item.begin(), item.end(), '"'), item.end());
if (item == "age")
break;
switch (i)
{
case 0: case 5: case 9: case 11: case 12: case 13: case 14:
{
double val = std::stof(item);
if (val > max_[i])
max_[i] = val;
else if (val < min_[i])
min_[i] = val;
attributes_[i] = std::make_shared<IntervalScaledAttribute>();
attributes_[i]->SetValue(val);
break;
}
case 1: case 2: case 8: case 10:
{
attributes_[i] = std::make_shared<CategoricalAttribute>();
attributes_[i]->SetValue(item);
break;
}
case 4: case 6: case 7: case 16:
{
attributes_[i] = std::make_shared<BinaryAttribute>();
attributes_[i]->SetValue(item);
break;
}
default: // Ordinal
{
double val = 0;
std::vector<std::string> values;
if (i == 3)
values = { "unknown", "primary", "secondary", "tertiary" };
else if (i == 15)
values = { "unknown", "other", "failure", "success" };
for (std::size_t j = 0, l = values.size(); j != l; j++)
{
if (item == values[j])
{
val = (double)(j / (l - 1));
break;
}
}
if (val > max_[i])
max_[i] = val;
else if (val < min_[i])
min_[i] = val;
attributes_[i] = std::make_shared<IntervalScaledAttribute>();
attributes_[i]->SetValue(val);
break;
}
}
}
}
bool ClientInfo::IsValid() const
{
for (int i = 0, l = attributes_.size(); i != l; i++)
{
if (attributes_[i] == nullptr)
return false;
}
return true;
}
double ClientInfo::ComputeDistance(const std::shared_ptr<ClientInfo> other_client) const
{
double distance = 0.0f;
const int NUM_EFFECTIVE_ATTRS = NUM_ATTRIBUTES - 1;
for (int i = 0; i != NUM_EFFECTIVE_ATTRS; i++)
{
double addition = attributes_[i]->ComputeDistance(other_client->attributes_[i]);
if (attributes_[i]->GetType() == DATA_TYPE_INTERVAL_SCALED)
{
addition /= (max_[i] - min_[i]);
}
distance += addition;
}
distance /= NUM_EFFECTIVE_ATTRS;
//std::cout << distance << '\n';
return distance;
}
bool ClientInfo::GetOutput() const
{
return attributes_[NUM_ATTRIBUTES - 1]->GetValue() == "yes";
}
std::vector<std::shared_ptr<DataAttribute>> ClientInfo::GetAttributes() const
{
return attributes_;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment