Skip to content

Instantly share code, notes, and snippets.

@atavory
Last active August 29, 2015 14:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save atavory/97d94485e48e80b54123 to your computer and use it in GitHub Desktop.
Save atavory/97d94485e48e80b54123 to your computer and use it in GitHub Desktop.
#include <iostream>
#include <vector>
#include <functional>
#include <algorithm>
#include <random>
#include <chrono>
#include <cassert>
#include <functional>
const std::size_t outer_tries = 3;
const size_t max_size = 50000;
const size_t num_tries = 3;
template<typename T, class Hash=std::hash<T>>
class smallest_dup_remover
{
public:
explicit smallest_dup_remover(std::size_t max_size)
{
while(m_mask < max_size)
m_mask *= 2;
m_status.resize(m_mask);
m_vals.resize(m_mask);
--m_mask;
}
void operator()(std::vector<T> &vals)
{
std::fill(std::begin(m_status), std::end(m_status), 0);
bool has = false;
T min_;
std::vector<T> spillover;
spillover.reserve(vals.size());
for(auto v: vals)
{
const std::size_t pos = m_hash(v) & m_mask;
char &status = m_status[pos];
switch(status)
{
case 0:
status = 1;
m_vals[pos] = v;
break;
case 1:
if(m_vals[pos] == v)
{
status = 2;
min_ = has? std::min(min_, v): v;
has = true;
}
else
spillover.push_back(v);
break;
case 2:
if(m_vals[pos] != v)
spillover.push_back(v);
}
}
std::sort(std::begin(spillover), std::end(spillover));
auto it = std::adjacent_find(std::begin(spillover), std::end(spillover));
if(has && it == std::end(spillover))
remove_min(vals, min_);
else if(has && it != std::end(spillover))
remove_min(vals, std::min(min_, *it));
else if(!has && it != std::end(spillover))
remove_min(vals, *it);
}
private:
void remove_min(std::vector<T> &vals, T t)
{
vals.erase(std::find(vals.begin(), vals.end(), t));
}
private:
size_t m_mask = 1;
std::vector<char> m_status;
std::vector<T> m_vals;
Hash m_hash;
};
template <typename Container>
void
eraseSmallestNonunique(Container& c)
{
std::sort(std::begin(c), std::end(c));
auto it = std::adjacent_find(std::begin(c), std::end(c));
if (it != std::end(c))
c.erase(it);
}
template <typename Container>
void
removeSmallestNonunique(Container& c)
{
using value_type = typename Container::value_type;
if (c.size() > 1)
{
std::make_heap(c.begin(), c.end(), std::greater<value_type>{});
std::pop_heap(c.begin(), c.end(), std::greater<value_type>{});
for (auto e = std::prev(c.end()); e != c.begin(); --e)
{
std::pop_heap(c.begin(), e, std::greater<value_type>{});
if (*e == e[-1])
{
c.erase(e);
break;
}
}
}
}
std::mt19937 generator;
std::vector<double> make_orig_completely_random(std::size_t size)
{
std::uniform_real_distribution<double> distribution;
std::vector<double> orig;
for(size_t i = 0; i < size; ++i)
orig.push_back(distribution(generator));
return orig;
}
std::vector<double> make_orig_low_eq(std::size_t size)
{
std::uniform_real_distribution<double> distribution;
std::vector<double> orig;
for(size_t i = 0; i < size; ++i)
orig.push_back(distribution(generator));
orig.push_back(0);
orig.push_back(0);
return orig;
}
std::vector<double> make_orig_ints(std::size_t size, std::size_t range)
{
std::uniform_int_distribution<int> distribution(0, range);
std::vector<double> orig;
for(size_t i = 0; i < size; ++i)
orig.push_back(distribution(generator));
orig.push_back(0);
orig.push_back(0);
return orig;
}
void print_res(
const std::string &op,
const std::string &desc,
size_t i,
double time)
{
std::cout << "test_res," <<
op << "," <<
desc << "," <<
i << "," << time << std::endl;
}
void check_same(
const std::vector<double> &lhs,
const std::vector<double> &rhs)
{
assert(std::is_permutation(std::begin(lhs), std::end(lhs), std::begin(rhs)));
}
void try_loop(
const std::string &desc,
std::function<std::vector<double>(std::size_t)> make_fn)
{
for(size_t t = 50; t <= max_size; t *= 10)
{
const std::vector<double> orig = make_fn(t);
auto v1 = orig;
smallest_dup_remover<double> r(t);
{
for(size_t try_i = 0; try_i < num_tries; ++try_i)
{
v1 = orig;
auto start = std::chrono::steady_clock::now();
r(v1);
auto end = std::chrono::steady_clock::now();
print_res("op_1", desc, t, (end - start).count());
}
}
auto v2 = orig;
for(size_t try_i = 0; try_i < num_tries; ++try_i)
{
v2 = orig;
auto start = std::chrono::steady_clock::now();
eraseSmallestNonunique(v2);
auto end = std::chrono::steady_clock::now();
print_res("op_2", desc, t, (end - start).count());
}
auto v3 = orig;
for(size_t try_i = 0; try_i < num_tries; ++try_i)
{
v3 = orig;
auto start = std::chrono::steady_clock::now();
removeSmallestNonunique(v3);
auto end = std::chrono::steady_clock::now();
print_res("op_3", desc, t, (end - start).count());
}
check_same(v1, v2);
check_same(v2, v3);
}
}
int main()
{
for(size_t i = 0; i < outer_tries; ++i)
{
try_loop("completely_random", [](std::size_t t){return make_orig_completely_random(t);});
try_loop("random_low_eq", [](std::size_t t){return make_orig_low_eq(t);});
try_loop("fixed_100", [](std::size_t t){return make_orig_ints(t, 100);});
try_loop("hinnant", [](std::size_t t){return make_orig_ints(t, 3 * t / 2);});
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment