Last active
January 14, 2016 16:02
-
-
Save kumagi/7cdf5a55a3f481c76bbf to your computer and use it in GitHub Desktop.
速くならん
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Created by kumagi on 16/01/13. | |
// | |
#ifndef BETASORT_RADIX_SORT_HPP | |
#define BETASORT_RADIX_SORT_HPP | |
#define _GNU_SOURCE 1 | |
#include <sched.h> | |
#include <assert.h> | |
#include <memory.h> | |
#include <cstdint> | |
#include <cstddef> | |
#include <array> | |
#include <iostream> | |
#include <vector> | |
#include <iterator> | |
void set_affinity(int i) { | |
cpu_set_t mask; | |
CPU_ZERO(&mask); | |
CPU_SET(i, &mask); | |
if (::sched_setaffinity(0, sizeof(mask), &mask) == -1) { | |
::perror("failed to setaffinity:"); | |
exit(1); | |
} | |
} | |
void radix_sort(const std::vector<uint64_t>::iterator& begin, | |
const std::vector<uint64_t>::iterator& end) { | |
const auto size = std::distance(begin, end); | |
set_affinity(0); | |
std::array<std::vector<uint64_t>, 256> buffers; | |
for (auto &buffer : buffers) { | |
buffer.reserve(static_cast<size_t>(size)); | |
} | |
for (int i = 0; i < 64; i += 8) { | |
for (auto pos = begin; pos != end; ++pos) { | |
buffers[(*pos >> i) & 255].emplace_back(std::move(*pos)); | |
} | |
auto pos = begin; | |
for (auto &buffer : buffers) { | |
for (auto &item : buffer) { | |
*pos++ = std::move(item); | |
} | |
buffer.clear(); | |
} | |
} | |
} | |
void radix_sort_wc(const std::vector<uint64_t>::iterator& begin, | |
const std::vector<uint64_t>::iterator& end) { | |
set_affinity(0); | |
const auto size = std::distance(begin, end); | |
constexpr int width = 8; | |
constexpr int nbuckets = 1 << width; | |
std::array<std::vector<uint64_t>, nbuckets> buffers; | |
for (auto &buffer : buffers) { | |
buffer.reserve(static_cast<size_t>(size)); | |
} | |
char working_space[nbuckets * 64 + 63]; | |
std::array<std::array<uint64_t, 8>, nbuckets>& wbuffer | |
= *reinterpret_cast<std::array<std::array<uint64_t, 8>, nbuckets>*> | |
((reinterpret_cast<intptr_t>(working_space) + 63) & ~63llu); | |
std::array<uint8_t, nbuckets> wbuff_n = {0}; | |
static_assert(64 % width == 0, "width must divide whole bits"); | |
for (int i = 0; i < 64; i += 8) { | |
for (auto pos = begin; pos != end; ++pos) { | |
uint8_t idx = static_cast<uint8_t>((*pos >> i) & (nbuckets - 1)); | |
wbuffer[idx][wbuff_n[idx]++] = *pos; | |
if (wbuff_n[idx] == wbuffer[idx].size()) { // reached cache line size | |
for (auto &&entry : wbuffer[idx]) { | |
buffers[idx].emplace_back(std::move(entry)); | |
} | |
wbuff_n[idx] = 0; | |
} | |
} | |
auto pos = begin; | |
for (size_t j = 0; j < buffers.size(); ++j) { | |
for (auto &&item : buffers[j]) { | |
*pos++ = std::move(item); | |
} | |
buffers[j].clear(); | |
for (size_t k = 0; k < wbuff_n[j]; ++k) { | |
*pos++ = std::move(wbuffer[j][k]); | |
} | |
wbuff_n[j] = 0; | |
} | |
} | |
} | |
void radix_sort_wc_opt(const std::vector<uint64_t>::iterator& begin, | |
const std::vector<uint64_t>::iterator& end) { | |
const auto size = std::distance(begin, end); | |
constexpr int width = 8; | |
constexpr int nbuckets = 1 << width; | |
std::array<std::vector<uint64_t>, nbuckets> buffers; | |
std::array<uint64_t, nbuckets> buffer_n; | |
for (size_t i = 0; i < nbuckets; ++i) { | |
buffers[i].reserve(static_cast<size_t>(size)); | |
buffer_n[i] = 0; | |
} | |
std::array<std::array<uint64_t, 8>, nbuckets> wbuffer; | |
std::array<uint8_t, nbuckets> wbuff_n; | |
for (size_t j = 0; j < wbuff_n.size(); ++j) { | |
wbuff_n[j] = 0; | |
} | |
static_assert(64 % width == 0, "width must divide whole bits"); | |
for (int i = 0; i < 64; i += width) { | |
for (auto pos = begin; pos != end; ++pos) { | |
uint8_t idx = static_cast<uint8_t>((*pos >> i) & (nbuckets - 1)); | |
wbuffer[idx][wbuff_n[idx]++] = *pos; | |
if (wbuff_n[idx] == wbuffer[idx].size()) { // reached cache line size | |
buffers[idx].resize(buffers[idx].size() + 8); | |
::memcpy(&buffers[idx][buffer_n[idx]], wbuffer[idx].data(), 64); | |
buffer_n[idx] += 8; | |
wbuff_n[idx] = 0; | |
} | |
} | |
for (size_t j = 0; j < wbuff_n.size(); ++j) { | |
::memcpy(&buffers[j][buffer_n[j]], wbuffer[j].data(), wbuff_n[j] * 8); | |
buffer_n[j] += wbuff_n[j]; | |
wbuff_n[j] = 0; | |
} | |
auto pos = begin; | |
for (size_t j = 0; j < buffers.size(); ++j) { | |
::memcpy(&*pos, buffers[j].data(), buffer_n[j] * 8); | |
pos += buffer_n[j]; | |
buffer_n[j] = 0; | |
} | |
} | |
} | |
#endif //BETASORT_RADIX_SORT_HPP |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment