Skip to content

Instantly share code, notes, and snippets.

@cesarkawakami
Created November 18, 2012 22:53
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cesarkawakami/4107959 to your computer and use it in GitHub Desktop.
Save cesarkawakami/4107959 to your computer and use it in GitHub Desktop.
Quick parallel out-of-place radix-sort implementation
#include <algorithm>
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <pthread.h>
#include <random>
const int DATA_LENGTH = 1000000000;
typedef uint32_t number;
void print_time(const char* msg) {
static auto time_previous = std::chrono::high_resolution_clock::now();
auto time_now = std::chrono::high_resolution_clock::now();
printf("%s: %d ms\n",
msg,
(int)std::chrono::duration_cast<std::chrono::milliseconds>(time_now - time_previous).count());
time_previous = time_now;
}
void one_step_radix_sort(number *data, number *sorted_data, uint32_t *counts,
int length, int current_cut, int cut_step) {
memset(counts, 0, (1 << cut_step) * sizeof(uint32_t));
uint32_t mask = (1 << cut_step) - 1;
for (int i = 0; i < length; ++i) ++counts[(data[i] >> current_cut) & mask];
for (int i = 1; i < (1 << cut_step); ++i) counts[i] += counts[i - 1];
for (int i = length - 1; i >= 0; --i)
sorted_data[--counts[(data[i] >> current_cut) & mask]] = data[i];
}
void radix_sort(number *data, int length, int cut_step = 8) {
auto sorted_data = new number[length];
auto counts = new uint32_t[1 << cut_step];
for (int cut = 0; cut < 64; cut += cut_step) {
one_step_radix_sort(data, sorted_data, counts, length, cut, cut_step);
memcpy(data, sorted_data, length * sizeof(number));
}
delete sorted_data;
}
struct radix_sort_arguments {
number *data;
int length;
int cut_step;
};
void *one_arg_radix_sort(void *arg) {
radix_sort_arguments arguments = *((radix_sort_arguments*) arg);
radix_sort(arguments.data, arguments.length, arguments.cut_step);
return NULL;
}
void parallel_radix_sort(number *data, int length, int front_cut = 3) {
auto sorted_data = new number[length];
uint32_t counts[(1 << front_cut) + 1];
pthread_t threads[1 << front_cut];
one_step_radix_sort(data, sorted_data, counts, length, 64 - front_cut, front_cut);
memcpy(data, sorted_data, length * sizeof(number));
delete sorted_data;
counts[1 << front_cut] = length;
radix_sort_arguments arguments[1 << front_cut];
for (int i = 0; i < (1 << front_cut); ++i) {
arguments[i] = (radix_sort_arguments) {
data + counts[i],
(int) (counts[i + 1] - counts[i]),
8
};
pthread_create(&threads[i],
NULL,
one_arg_radix_sort,
&arguments[i]);
}
for (int i = 0; i < (1 << front_cut); ++i)
pthread_join(threads[i], NULL);
}
int main() {
auto data = new number[DATA_LENGTH];
auto data2 = new number[DATA_LENGTH];
auto data3 = new number[DATA_LENGTH];
print_time("start");
std::minstd_rand rng;
std::uniform_int_distribution<number> uniform_uint64;
for (int i = 0; i < DATA_LENGTH; ++i)
data[i] = uniform_uint64(rng);
print_time("random number generation");
memcpy(data2, data, DATA_LENGTH * sizeof(number));
memcpy(data3, data, DATA_LENGTH * sizeof(number));
print_time("data copy");
parallel_radix_sort(data, DATA_LENGTH);
print_time("parallel sort");
radix_sort(data2, DATA_LENGTH);
print_time("sequential sort");
std::sort(data3, data3 + DATA_LENGTH);
print_time("C++ sort");
for (int i = 0; i < DATA_LENGTH; ++i)
if (data[i] != data2[i])
std::cout << "FAILURE: " << data[i] << " != " << data2[i] << std::endl;
for (int i = 0; i < DATA_LENGTH; ++i)
if (data[i] != data3[i])
std::cout << "FAILURE: " << data[i] << " != " << data3[i] << std::endl;
print_time("checking");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment