Skip to content

Instantly share code, notes, and snippets.

@nathan-russell
Created May 3, 2016 22:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nathan-russell/711932958187918215091d0310c99028 to your computer and use it in GitHub Desktop.
Save nathan-russell/711932958187918215091d0310c99028 to your computer and use it in GitHub Desktop.
// [[Rcpp::plugins(cpp11)]]
#include <Rcpp.h>
#include <utility>
#include <queue>
class histogram {
private:
struct paired {
typedef std::pair<double, unsigned int> pair_t;
pair_t pair;
unsigned int is_set;
paired()
: pair(pair_t()),
is_set(0)
{}
paired(double x)
: pair(std::make_pair(x, 1)),
is_set(1)
{}
bool operator==(const paired& other) const {
return pair.first == other.pair.first;
}
bool operator==(double other) const {
return is_set && (pair.first == other);
}
bool operator>(double other) const {
return is_set && (pair.first > other);
}
bool operator<(double other) const {
return is_set && (pair.first < other);
}
paired& operator++() {
++pair.second;
return *this;
}
paired operator++(int) {
paired tmp(*this);
++(*this);
return tmp;
}
};
struct greater {
bool operator()(const paired& lhs, const paired& rhs) const {
if (!lhs.is_set) return false;
if (!rhs.is_set) return true;
return lhs.pair.first > rhs.pair.first;
}
};
typedef std::priority_queue<
paired,
std::vector<paired>,
greater
> queue_t;
unsigned int sz;
queue_t queue;
void insert(double x) {
if (queue.empty()) {
queue.emplace(x);
return;
}
if (queue.top() > x && queue.size() >= sz) return;
queue_t qtmp;
bool matched = false;
while (queue.size()) {
paired elem = queue.top();
if (elem == x) {
qtmp.emplace(std::move(++elem));
matched = true;
} else {
qtmp.emplace(std::move(elem));
}
queue.pop();
}
if (!matched) {
if (qtmp.size() >= sz) qtmp.pop();
qtmp.emplace(x);
}
std::swap(queue, qtmp);
}
public:
histogram(unsigned int sz_)
: sz(sz_),
queue(queue_t())
{}
template <typename InputIt>
void insert(InputIt first, InputIt last) {
while (first != last) insert(*first++);
}
Rcpp::List get() const {
Rcpp::NumericVector values(sz);
Rcpp::IntegerVector freq(sz);
R_xlen_t i = 0;
queue_t tmp(queue);
while (tmp.size()) {
values[i] = tmp.top().pair.first;
freq[i] = tmp.top().pair.second;
++i;
tmp.pop();
}
return Rcpp::List::create(
Rcpp::Named("value") = values,
Rcpp::Named("frequency") = freq);
}
};
// [[Rcpp::export]]
Rcpp::List top_n2(Rcpp::NumericVector x, int n = 5) {
histogram h(n);
h.insert(x.begin(), x.end());
return h.get();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment