Skip to content

Instantly share code, notes, and snippets.

@Lazin
Last active August 29, 2015 14:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Lazin/d23d94ebc36f8a8542f2 to your computer and use it in GitHub Desktop.
Save Lazin/d23d94ebc36f8a8542f2 to your computer and use it in GitHub Desktop.
#include <iostream>
#include <tuple>
#include <vector>
#include <algorithm>
#include <boost/timer.hpp>
#include <boost/range/iterator_range.hpp>
#include <boost/heap/skew_heap.hpp>
#include <google/profiler.h>
using namespace std;
const unsigned NUM_WAYS = 1024;
const unsigned NUM_ITERATIONS = 100000;
typedef std::tuple<uint64_t, uint32_t> TKey;
typedef std::vector<TKey> SortedRun;
typedef std::vector<SortedRun> Dataset;
void create_dataset(Dataset& ds) {
ds.resize(NUM_WAYS);
for (unsigned int i = 0; i < NUM_WAYS; i++) {
ds[i].resize(NUM_ITERATIONS);
for (unsigned j = 0; j < NUM_ITERATIONS; j++)
ds[i][j] = std::make_tuple(i + j * NUM_WAYS, 0);
}
}
void binary_merge(Dataset const& ds, int x, int y, SortedRun& out) {
out.reserve(ds[x].size() * 2);
std::merge(ds[x].begin(), ds[x].end(), ds[y].begin(), ds[y].end(), std::back_inserter(out), std::less<TKey>());
}
void binary_merge(Dataset const& ds, Dataset& ds_out) {
for (auto i = 0u; i < ds_out.size(); i++) {
binary_merge(ds, i*2, i*2+1, ds_out[i]);
}
}
void binary_merge(Dataset& ds, SortedRun& out) {
Dataset tmp;
while(ds.size() != 1) {
tmp.clear();
tmp.resize(ds.size() / 2);
binary_merge(ds, tmp);
std::swap(ds, tmp);
}
std::swap(out, ds[0]);
}
void kway_merge(Dataset& ds, SortedRun& out) {
typedef SortedRun::const_iterator iter_t;
std::vector<boost::iterator_range<iter_t>> ranges;
for (auto const& run: ds) {
ranges.push_back(boost::make_iterator_range(run));
}
typedef std::tuple<TKey, int> HeapItem;
struct HeapOrder {
bool operator() (HeapItem const& lhs, HeapItem const& rhs) const {
return std::get<0>(lhs) > std::get<0>(rhs);
}
};
typedef boost::heap::skew_heap<HeapItem, boost::heap::compare<HeapOrder>> Heap;
Heap heap;
int index = 0;
for (auto& range: ranges) {
if (range) {
auto value = range.front();
range.advance_begin(1);
heap.push(std::make_tuple(value, index));
index++;
}
}
while(!heap.empty()) {
auto item = heap.top();
auto point = std::get<0>(item);
int index = std::get<1>(item);
out.push_back(point);
heap.pop();
if (ranges[index]) {
auto value = ranges[index].front();
ranges[index].advance_begin(1);
heap.push(std::make_tuple(value, index));
}
}
}
struct RangeGroup {
size_t size;
vector<vector<TKey> > rs;
vector<size_t> ps;
void reset() {
for (size_t i = 0; i < ps.size(); i++)
ps[i] = 0;
}
TKey get_next(size_t i) {
assert (i < rs.size());
auto res = std::make_tuple(~0UL, ~0U);
if (ps[i] < rs[i].size()) {
res = rs[i][ps[i]];
ps[i]++;
}
return res;
}
void init_bad(size_t n, size_t m) {
rs.resize(n);
ps.resize(n);
for (unsigned int i = 0; i < n; i++) {
rs[i].resize(m);
for (unsigned j = 0; j < m; j++)
rs[i][j] = std::make_tuple((i + j * n), 0u);
}
}
};
class MergerHeap
{
struct Node {
TKey v;
unsigned int idx, fill;
};
vector<Node> nodes;
void drawn_node(unsigned int i, unsigned int n)
{
assert (i < n);
auto v = nodes[i].v;
unsigned int idx = nodes[i].idx;
for (;;) {
unsigned int ic = 2 * i + 1;
if (ic >= n)
break;
ic += (nodes[ic].v > nodes[ic + 1].v);
if (v <= nodes[ic].v)
break;
nodes[i] = nodes[ic];
i = ic;
}
nodes[i].v = v;
nodes[i].idx = idx;
}
void heapify()
{
unsigned int n = nodes.size() - 1;
for (unsigned int i = n; i > 0;) {
drawn_node(--i, n);
}
}
public:
void merge(RangeGroup *rg, TKey *dst)
{
unsigned int n = rg->rs.size();
rg->reset();
assert (n >= 2);
nodes.resize(n + 1);
nodes[n].v = std::make_tuple(~0UL, ~0U);
nodes[n].idx = ~0;
for (unsigned int i = 0; i < n; i++) {
nodes[i].idx = i;
nodes[i].v = rg->get_next(i);
}
heapify();
for (;;) {
unsigned int ic = 1 + (nodes[1].v > nodes[2].v);
auto v = nodes[0].v;
unsigned int idx = nodes[0].idx;
auto nv = nodes[ic].v;
const auto MAX_VAL = std::make_tuple(~0UL, ~0U);
for (;;) {
if (v == MAX_VAL)
return;
*dst++ = v;
v = rg->get_next(idx);
if (v > nv)
break;
}
unsigned int i = 0;
nodes[0].v = nv;
nodes[0].idx = nodes[ic].idx;
i = ic;
for (;;) {
ic = 2 * i + 1;
if (ic >= n)
break;
ic += (nodes[ic].v > nodes[ic + 1].v);
if (v <= nodes[ic].v)
break;
nodes[i].v = nodes[ic].v;
nodes[i].idx = nodes[ic].idx;
i = ic;
}
nodes[i].v = v;
nodes[i].idx = idx;
}
}
};
int main(int cnt, const char** args)
{
// Binary merge
{
Dataset ds;
SortedRun out;
create_dataset(ds);
boost::timer tm;
binary_merge(ds, out);
auto elapsed = tm.elapsed();
std::cout << "binary merge " << elapsed << std::endl;
}
// K-Way merge
{
Dataset ds;
SortedRun out;
create_dataset(ds);
boost::timer tm;
kway_merge(ds, out);
auto elapsed = tm.elapsed();
std::cout << "K-way merge " << elapsed << std::endl;
}
//
{
TKey* dst = new TKey[NUM_WAYS*NUM_ITERATIONS];
RangeGroup rgroup;
rgroup.init_bad(NUM_WAYS, NUM_ITERATIONS);
MergerHeap mheap;
boost::timer tm;
mheap.merge(&rgroup, dst);
auto elapsed = tm.elapsed();
std::cout << "MergerHeap " << elapsed << std::endl;
delete dst;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment