Skip to content

Instantly share code, notes, and snippets.

@kumagi
Last active January 14, 2016 16:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kumagi/7cdf5a55a3f481c76bbf to your computer and use it in GitHub Desktop.
Save kumagi/7cdf5a55a3f481c76bbf to your computer and use it in GitHub Desktop.
速くならん
//
// Created by kumagi on 16/01/13.
//
#ifndef BETASORT_RADIX_SORT_HPP
#define BETASORT_RADIX_SORT_HPP
#define _GNU_SOURCE 1
#include <sched.h>
#include <assert.h>
#include <memory.h>
#include <cstdint>
#include <cstddef>
#include <array>
#include <iostream>
#include <vector>
#include <iterator>
void set_affinity(int i) {
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(i, &mask);
if (::sched_setaffinity(0, sizeof(mask), &mask) == -1) {
::perror("failed to setaffinity:");
exit(1);
}
}
void radix_sort(const std::vector<uint64_t>::iterator& begin,
const std::vector<uint64_t>::iterator& end) {
const auto size = std::distance(begin, end);
set_affinity(0);
std::array<std::vector<uint64_t>, 256> buffers;
for (auto &buffer : buffers) {
buffer.reserve(static_cast<size_t>(size));
}
for (int i = 0; i < 64; i += 8) {
for (auto pos = begin; pos != end; ++pos) {
buffers[(*pos >> i) & 255].emplace_back(std::move(*pos));
}
auto pos = begin;
for (auto &buffer : buffers) {
for (auto &item : buffer) {
*pos++ = std::move(item);
}
buffer.clear();
}
}
}
void radix_sort_wc(const std::vector<uint64_t>::iterator& begin,
const std::vector<uint64_t>::iterator& end) {
set_affinity(0);
const auto size = std::distance(begin, end);
constexpr int width = 8;
constexpr int nbuckets = 1 << width;
std::array<std::vector<uint64_t>, nbuckets> buffers;
for (auto &buffer : buffers) {
buffer.reserve(static_cast<size_t>(size));
}
char working_space[nbuckets * 64 + 63];
std::array<std::array<uint64_t, 8>, nbuckets>& wbuffer
= *reinterpret_cast<std::array<std::array<uint64_t, 8>, nbuckets>*>
((reinterpret_cast<intptr_t>(working_space) + 63) & ~63llu);
std::array<uint8_t, nbuckets> wbuff_n = {0};
static_assert(64 % width == 0, "width must divide whole bits");
for (int i = 0; i < 64; i += 8) {
for (auto pos = begin; pos != end; ++pos) {
uint8_t idx = static_cast<uint8_t>((*pos >> i) & (nbuckets - 1));
wbuffer[idx][wbuff_n[idx]++] = *pos;
if (wbuff_n[idx] == wbuffer[idx].size()) { // reached cache line size
for (auto &&entry : wbuffer[idx]) {
buffers[idx].emplace_back(std::move(entry));
}
wbuff_n[idx] = 0;
}
}
auto pos = begin;
for (size_t j = 0; j < buffers.size(); ++j) {
for (auto &&item : buffers[j]) {
*pos++ = std::move(item);
}
buffers[j].clear();
for (size_t k = 0; k < wbuff_n[j]; ++k) {
*pos++ = std::move(wbuffer[j][k]);
}
wbuff_n[j] = 0;
}
}
}
void radix_sort_wc_opt(const std::vector<uint64_t>::iterator& begin,
const std::vector<uint64_t>::iterator& end) {
const auto size = std::distance(begin, end);
constexpr int width = 8;
constexpr int nbuckets = 1 << width;
std::array<std::vector<uint64_t>, nbuckets> buffers;
std::array<uint64_t, nbuckets> buffer_n;
for (size_t i = 0; i < nbuckets; ++i) {
buffers[i].reserve(static_cast<size_t>(size));
buffer_n[i] = 0;
}
std::array<std::array<uint64_t, 8>, nbuckets> wbuffer;
std::array<uint8_t, nbuckets> wbuff_n;
for (size_t j = 0; j < wbuff_n.size(); ++j) {
wbuff_n[j] = 0;
}
static_assert(64 % width == 0, "width must divide whole bits");
for (int i = 0; i < 64; i += width) {
for (auto pos = begin; pos != end; ++pos) {
uint8_t idx = static_cast<uint8_t>((*pos >> i) & (nbuckets - 1));
wbuffer[idx][wbuff_n[idx]++] = *pos;
if (wbuff_n[idx] == wbuffer[idx].size()) { // reached cache line size
buffers[idx].resize(buffers[idx].size() + 8);
::memcpy(&buffers[idx][buffer_n[idx]], wbuffer[idx].data(), 64);
buffer_n[idx] += 8;
wbuff_n[idx] = 0;
}
}
for (size_t j = 0; j < wbuff_n.size(); ++j) {
::memcpy(&buffers[j][buffer_n[j]], wbuffer[j].data(), wbuff_n[j] * 8);
buffer_n[j] += wbuff_n[j];
wbuff_n[j] = 0;
}
auto pos = begin;
for (size_t j = 0; j < buffers.size(); ++j) {
::memcpy(&*pos, buffers[j].data(), buffer_n[j] * 8);
pos += buffer_n[j];
buffer_n[j] = 0;
}
}
}
#endif //BETASORT_RADIX_SORT_HPP
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment