Last active
August 29, 2015 14:03
-
-
Save Lazin/d23d94ebc36f8a8542f2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <tuple> | |
#include <vector> | |
#include <algorithm> | |
#include <boost/timer.hpp> | |
#include <boost/range/iterator_range.hpp> | |
#include <boost/heap/skew_heap.hpp> | |
#include <google/profiler.h> | |
using namespace std; | |
const unsigned NUM_WAYS = 1024; | |
const unsigned NUM_ITERATIONS = 100000; | |
typedef std::tuple<uint64_t, uint32_t> TKey; | |
typedef std::vector<TKey> SortedRun; | |
typedef std::vector<SortedRun> Dataset; | |
void create_dataset(Dataset& ds) { | |
ds.resize(NUM_WAYS); | |
for (unsigned int i = 0; i < NUM_WAYS; i++) { | |
ds[i].resize(NUM_ITERATIONS); | |
for (unsigned j = 0; j < NUM_ITERATIONS; j++) | |
ds[i][j] = std::make_tuple(i + j * NUM_WAYS, 0); | |
} | |
} | |
void binary_merge(Dataset const& ds, int x, int y, SortedRun& out) { | |
out.reserve(ds[x].size() * 2); | |
std::merge(ds[x].begin(), ds[x].end(), ds[y].begin(), ds[y].end(), std::back_inserter(out), std::less<TKey>()); | |
} | |
void binary_merge(Dataset const& ds, Dataset& ds_out) { | |
for (auto i = 0u; i < ds_out.size(); i++) { | |
binary_merge(ds, i*2, i*2+1, ds_out[i]); | |
} | |
} | |
void binary_merge(Dataset& ds, SortedRun& out) { | |
Dataset tmp; | |
while(ds.size() != 1) { | |
tmp.clear(); | |
tmp.resize(ds.size() / 2); | |
binary_merge(ds, tmp); | |
std::swap(ds, tmp); | |
} | |
std::swap(out, ds[0]); | |
} | |
void kway_merge(Dataset& ds, SortedRun& out) { | |
typedef SortedRun::const_iterator iter_t; | |
std::vector<boost::iterator_range<iter_t>> ranges; | |
for (auto const& run: ds) { | |
ranges.push_back(boost::make_iterator_range(run)); | |
} | |
typedef std::tuple<TKey, int> HeapItem; | |
struct HeapOrder { | |
bool operator() (HeapItem const& lhs, HeapItem const& rhs) const { | |
return std::get<0>(lhs) > std::get<0>(rhs); | |
} | |
}; | |
typedef boost::heap::skew_heap<HeapItem, boost::heap::compare<HeapOrder>> Heap; | |
Heap heap; | |
int index = 0; | |
for (auto& range: ranges) { | |
if (range) { | |
auto value = range.front(); | |
range.advance_begin(1); | |
heap.push(std::make_tuple(value, index)); | |
index++; | |
} | |
} | |
while(!heap.empty()) { | |
auto item = heap.top(); | |
auto point = std::get<0>(item); | |
int index = std::get<1>(item); | |
out.push_back(point); | |
heap.pop(); | |
if (ranges[index]) { | |
auto value = ranges[index].front(); | |
ranges[index].advance_begin(1); | |
heap.push(std::make_tuple(value, index)); | |
} | |
} | |
} | |
struct RangeGroup { | |
size_t size; | |
vector<vector<TKey> > rs; | |
vector<size_t> ps; | |
void reset() { | |
for (size_t i = 0; i < ps.size(); i++) | |
ps[i] = 0; | |
} | |
TKey get_next(size_t i) { | |
assert (i < rs.size()); | |
auto res = std::make_tuple(~0UL, ~0U); | |
if (ps[i] < rs[i].size()) { | |
res = rs[i][ps[i]]; | |
ps[i]++; | |
} | |
return res; | |
} | |
void init_bad(size_t n, size_t m) { | |
rs.resize(n); | |
ps.resize(n); | |
for (unsigned int i = 0; i < n; i++) { | |
rs[i].resize(m); | |
for (unsigned j = 0; j < m; j++) | |
rs[i][j] = std::make_tuple((i + j * n), 0u); | |
} | |
} | |
}; | |
class MergerHeap | |
{ | |
struct Node { | |
TKey v; | |
unsigned int idx, fill; | |
}; | |
vector<Node> nodes; | |
void drawn_node(unsigned int i, unsigned int n) | |
{ | |
assert (i < n); | |
auto v = nodes[i].v; | |
unsigned int idx = nodes[i].idx; | |
for (;;) { | |
unsigned int ic = 2 * i + 1; | |
if (ic >= n) | |
break; | |
ic += (nodes[ic].v > nodes[ic + 1].v); | |
if (v <= nodes[ic].v) | |
break; | |
nodes[i] = nodes[ic]; | |
i = ic; | |
} | |
nodes[i].v = v; | |
nodes[i].idx = idx; | |
} | |
void heapify() | |
{ | |
unsigned int n = nodes.size() - 1; | |
for (unsigned int i = n; i > 0;) { | |
drawn_node(--i, n); | |
} | |
} | |
public: | |
void merge(RangeGroup *rg, TKey *dst) | |
{ | |
unsigned int n = rg->rs.size(); | |
rg->reset(); | |
assert (n >= 2); | |
nodes.resize(n + 1); | |
nodes[n].v = std::make_tuple(~0UL, ~0U); | |
nodes[n].idx = ~0; | |
for (unsigned int i = 0; i < n; i++) { | |
nodes[i].idx = i; | |
nodes[i].v = rg->get_next(i); | |
} | |
heapify(); | |
for (;;) { | |
unsigned int ic = 1 + (nodes[1].v > nodes[2].v); | |
auto v = nodes[0].v; | |
unsigned int idx = nodes[0].idx; | |
auto nv = nodes[ic].v; | |
const auto MAX_VAL = std::make_tuple(~0UL, ~0U); | |
for (;;) { | |
if (v == MAX_VAL) | |
return; | |
*dst++ = v; | |
v = rg->get_next(idx); | |
if (v > nv) | |
break; | |
} | |
unsigned int i = 0; | |
nodes[0].v = nv; | |
nodes[0].idx = nodes[ic].idx; | |
i = ic; | |
for (;;) { | |
ic = 2 * i + 1; | |
if (ic >= n) | |
break; | |
ic += (nodes[ic].v > nodes[ic + 1].v); | |
if (v <= nodes[ic].v) | |
break; | |
nodes[i].v = nodes[ic].v; | |
nodes[i].idx = nodes[ic].idx; | |
i = ic; | |
} | |
nodes[i].v = v; | |
nodes[i].idx = idx; | |
} | |
} | |
}; | |
int main(int cnt, const char** args) | |
{ | |
// Binary merge | |
{ | |
Dataset ds; | |
SortedRun out; | |
create_dataset(ds); | |
boost::timer tm; | |
binary_merge(ds, out); | |
auto elapsed = tm.elapsed(); | |
std::cout << "binary merge " << elapsed << std::endl; | |
} | |
// K-Way merge | |
{ | |
Dataset ds; | |
SortedRun out; | |
create_dataset(ds); | |
boost::timer tm; | |
kway_merge(ds, out); | |
auto elapsed = tm.elapsed(); | |
std::cout << "K-way merge " << elapsed << std::endl; | |
} | |
// | |
{ | |
TKey* dst = new TKey[NUM_WAYS*NUM_ITERATIONS]; | |
RangeGroup rgroup; | |
rgroup.init_bad(NUM_WAYS, NUM_ITERATIONS); | |
MergerHeap mheap; | |
boost::timer tm; | |
mheap.merge(&rgroup, dst); | |
auto elapsed = tm.elapsed(); | |
std::cout << "MergerHeap " << elapsed << std::endl; | |
delete dst; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment