Last active
August 29, 2015 14:23
-
-
Save atavory/97d94485e48e80b54123 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <vector> | |
#include <functional> | |
#include <algorithm> | |
#include <random> | |
#include <chrono> | |
#include <cassert> | |
#include <functional> | |
const std::size_t outer_tries = 3; | |
const size_t max_size = 50000; | |
const size_t num_tries = 3; | |
template<typename T, class Hash=std::hash<T>> | |
class smallest_dup_remover | |
{ | |
public: | |
explicit smallest_dup_remover(std::size_t max_size) | |
{ | |
while(m_mask < max_size) | |
m_mask *= 2; | |
m_status.resize(m_mask); | |
m_vals.resize(m_mask); | |
--m_mask; | |
} | |
void operator()(std::vector<T> &vals) | |
{ | |
std::fill(std::begin(m_status), std::end(m_status), 0); | |
bool has = false; | |
T min_; | |
std::vector<T> spillover; | |
spillover.reserve(vals.size()); | |
for(auto v: vals) | |
{ | |
const std::size_t pos = m_hash(v) & m_mask; | |
char &status = m_status[pos]; | |
switch(status) | |
{ | |
case 0: | |
status = 1; | |
m_vals[pos] = v; | |
break; | |
case 1: | |
if(m_vals[pos] == v) | |
{ | |
status = 2; | |
min_ = has? std::min(min_, v): v; | |
has = true; | |
} | |
else | |
spillover.push_back(v); | |
break; | |
case 2: | |
if(m_vals[pos] != v) | |
spillover.push_back(v); | |
} | |
} | |
std::sort(std::begin(spillover), std::end(spillover)); | |
auto it = std::adjacent_find(std::begin(spillover), std::end(spillover)); | |
if(has && it == std::end(spillover)) | |
remove_min(vals, min_); | |
else if(has && it != std::end(spillover)) | |
remove_min(vals, std::min(min_, *it)); | |
else if(!has && it != std::end(spillover)) | |
remove_min(vals, *it); | |
} | |
private: | |
void remove_min(std::vector<T> &vals, T t) | |
{ | |
vals.erase(std::find(vals.begin(), vals.end(), t)); | |
} | |
private: | |
size_t m_mask = 1; | |
std::vector<char> m_status; | |
std::vector<T> m_vals; | |
Hash m_hash; | |
}; | |
template <typename Container> | |
void | |
eraseSmallestNonunique(Container& c) | |
{ | |
std::sort(std::begin(c), std::end(c)); | |
auto it = std::adjacent_find(std::begin(c), std::end(c)); | |
if (it != std::end(c)) | |
c.erase(it); | |
} | |
template <typename Container> | |
void | |
removeSmallestNonunique(Container& c) | |
{ | |
using value_type = typename Container::value_type; | |
if (c.size() > 1) | |
{ | |
std::make_heap(c.begin(), c.end(), std::greater<value_type>{}); | |
std::pop_heap(c.begin(), c.end(), std::greater<value_type>{}); | |
for (auto e = std::prev(c.end()); e != c.begin(); --e) | |
{ | |
std::pop_heap(c.begin(), e, std::greater<value_type>{}); | |
if (*e == e[-1]) | |
{ | |
c.erase(e); | |
break; | |
} | |
} | |
} | |
} | |
std::mt19937 generator; | |
std::vector<double> make_orig_completely_random(std::size_t size) | |
{ | |
std::uniform_real_distribution<double> distribution; | |
std::vector<double> orig; | |
for(size_t i = 0; i < size; ++i) | |
orig.push_back(distribution(generator)); | |
return orig; | |
} | |
std::vector<double> make_orig_low_eq(std::size_t size) | |
{ | |
std::uniform_real_distribution<double> distribution; | |
std::vector<double> orig; | |
for(size_t i = 0; i < size; ++i) | |
orig.push_back(distribution(generator)); | |
orig.push_back(0); | |
orig.push_back(0); | |
return orig; | |
} | |
std::vector<double> make_orig_ints(std::size_t size, std::size_t range) | |
{ | |
std::uniform_int_distribution<int> distribution(0, range); | |
std::vector<double> orig; | |
for(size_t i = 0; i < size; ++i) | |
orig.push_back(distribution(generator)); | |
orig.push_back(0); | |
orig.push_back(0); | |
return orig; | |
} | |
void print_res( | |
const std::string &op, | |
const std::string &desc, | |
size_t i, | |
double time) | |
{ | |
std::cout << "test_res," << | |
op << "," << | |
desc << "," << | |
i << "," << time << std::endl; | |
} | |
void check_same( | |
const std::vector<double> &lhs, | |
const std::vector<double> &rhs) | |
{ | |
assert(std::is_permutation(std::begin(lhs), std::end(lhs), std::begin(rhs))); | |
} | |
void try_loop( | |
const std::string &desc, | |
std::function<std::vector<double>(std::size_t)> make_fn) | |
{ | |
for(size_t t = 50; t <= max_size; t *= 10) | |
{ | |
const std::vector<double> orig = make_fn(t); | |
auto v1 = orig; | |
smallest_dup_remover<double> r(t); | |
{ | |
for(size_t try_i = 0; try_i < num_tries; ++try_i) | |
{ | |
v1 = orig; | |
auto start = std::chrono::steady_clock::now(); | |
r(v1); | |
auto end = std::chrono::steady_clock::now(); | |
print_res("op_1", desc, t, (end - start).count()); | |
} | |
} | |
auto v2 = orig; | |
for(size_t try_i = 0; try_i < num_tries; ++try_i) | |
{ | |
v2 = orig; | |
auto start = std::chrono::steady_clock::now(); | |
eraseSmallestNonunique(v2); | |
auto end = std::chrono::steady_clock::now(); | |
print_res("op_2", desc, t, (end - start).count()); | |
} | |
auto v3 = orig; | |
for(size_t try_i = 0; try_i < num_tries; ++try_i) | |
{ | |
v3 = orig; | |
auto start = std::chrono::steady_clock::now(); | |
removeSmallestNonunique(v3); | |
auto end = std::chrono::steady_clock::now(); | |
print_res("op_3", desc, t, (end - start).count()); | |
} | |
check_same(v1, v2); | |
check_same(v2, v3); | |
} | |
} | |
int main() | |
{ | |
for(size_t i = 0; i < outer_tries; ++i) | |
{ | |
try_loop("completely_random", [](std::size_t t){return make_orig_completely_random(t);}); | |
try_loop("random_low_eq", [](std::size_t t){return make_orig_low_eq(t);}); | |
try_loop("fixed_100", [](std::size_t t){return make_orig_ints(t, 100);}); | |
try_loop("hinnant", [](std::size_t t){return make_orig_ints(t, 3 * t / 2);}); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment