Skip to content

Instantly share code, notes, and snippets.

@qtxie
Created September 14, 2019 19:20
Show Gist options
  • Save qtxie/12c087db8a2c0c2d9fe0104af2f4236e to your computer and use it in GitHub Desktop.
Save qtxie/12c087db8a2c0c2d9fe0104af2f4236e to your computer and use it in GitHub Desktop.
#include <random>
#include <ctime>
#include <vector>
#include <iostream>
#include <chrono>
#include <utility>
#include <array>
#include <type_traits>
#include <functional>
#include <string>
#include <stdlib.h>
#include "pdqsort.h"
//#include "timsort.h"
#ifdef _WIN32
#include <intrin.h>
#define rdtsc __rdtsc
#else
#ifdef __i586__
static __inline__ unsigned long long rdtsc() {
unsigned long long int x;
__asm__ volatile(".byte 0x0f, 0x31" : "=A" (x));
return x;
}
#elif defined(__x86_64__)
static __inline__ unsigned long long rdtsc() {
unsigned hi, lo;
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
return ((unsigned long long) lo) | (((unsigned long long) hi) << 32);
}
#else
#error no rdtsc implementation
#endif
#endif
struct Cell {
int header;
int pading;
int value;
int pading2;
Cell(int x) : header(0), value(x) {}
int compareCell( Cell const & l, const Cell& r) const {
return l.value - r.value;
}
int dispatchCompare(Cell const& l, const Cell& r) const {
if (l.header == r.header && l.header == 0) {
return compareCell(l, r);
}
}
bool operator<(const Cell& r) const
{
return 0 > dispatchCompare(*this, r);
}
};
static char* med3(char*, char*, char*, int (*)(const void*, const void*));
static void swapfunc(char*, char*, int, int);
#define min(a, b) (a) < (b) ? a : b
/*
* Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
*/
#define swapcode(TYPE, parmi, parmj, n) { \
long i = (n) / sizeof (TYPE); \
TYPE *pi = (TYPE *) (parmi); \
TYPE *pj = (TYPE *) (parmj); \
do { \
TYPE t = *pi; \
*pi++ = *pj; \
*pj++ = t; \
} while (--i > 0); \
}
#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
static void
swapfunc(char* a, char* b, int n, int swaptype)
{
if (swaptype <= 1)
swapcode(long, a, b, n)
else
swapcode(char, a, b, n)
}
#define swap(a, b) \
if (swaptype == 0) { \
long t = *(long *)(a); \
*(long *)(a) = *(long *)(b); \
*(long *)(b) = t; \
} else \
swapfunc(a, b, es, swaptype)
#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype)
static char*
med3(char* a, char* b, char* c, int (*cmp)(const void*, const void*))
{
return cmp(a, b) < 0 ?
(cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a))
: (cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c));
}
void qqsort(void* aa, size_t n, size_t es, int (*cmp)(const void*, const void*))
{
char* pa, * pb, * pc, * pd, * pl, * pm, * pn;
int d, r, swaptype, swap_cnt;
char* a = (char*)aa;
loop: SWAPINIT(a, es);
swap_cnt = 0;
if (n < 7) {
for (pm = (char*)a + es; pm < (char*)a + n * es; pm += es)
for (pl = pm; pl > (char*) a && cmp(pl - es, pl) > 0;
pl -= es)
swap(pl, pl - es);
return;
}
pm = (char*)a + (n / 2) * es;
if (n > 7) {
pl = (char*)a;
pn = (char*)a + (n - 1) * es;
if (n > 40) {
d = (n / 8) * es;
pl = med3(pl, pl + d, pl + 2 * d, cmp);
pm = med3(pm - d, pm, pm + d, cmp);
pn = med3(pn - 2 * d, pn - d, pn, cmp);
}
pm = med3(pl, pm, pn, cmp);
}
swap(a, pm);
pa = pb = (char*)a + es;
pc = pd = (char*)a + (n - 1) * es;
for (;;) {
while (pb <= pc && (r = cmp(pb, a)) <= 0) {
if (r == 0) {
swap_cnt = 1;
swap(pa, pb);
pa += es;
}
pb += es;
}
while (pb <= pc && (r = cmp(pc, a)) >= 0) {
if (r == 0) {
swap_cnt = 1;
swap(pc, pd);
pd -= es;
}
pc -= es;
}
if (pb > pc)
break;
swap(pb, pc);
swap_cnt = 1;
pb += es;
pc -= es;
}
if (swap_cnt == 0) { /* Switch to insertion sort */
for (pm = (char*)a + es; pm < (char*)a + n * es; pm += es)
for (pl = pm; pl > (char*) a && cmp(pl - es, pl) > 0;
pl -= es)
swap(pl, pl - es);
return;
}
pn = (char*)a + n * es;
r = min(pa - (char*)a, pb - pa);
vecswap(a, pb - r, r);
r = min(pd - pc, pn - pd - (int)es);
vecswap(pb, pn - r, r);
if ((r = pb - pa) > (int)es)
qsort(a, r / es, es, cmp);
if ((r = pd - pc) > (int)es) {
/* Iterate rather than recurse to save stack space */
a = pn - r;
n = r / es;
goto loop;
}
/* qsort(pn - r, r / es, es, cmp);*/
}
std::vector<Cell> shuffled_int(int size, std::mt19937_64& rng) {
std::vector<Cell> v; v.reserve(size);
for (int i = 0; i < size; ++i) v.push_back(i);
std::shuffle(v.begin(), v.end(), rng);
return v;
}
std::vector<Cell> shuffled_16_values_int(int size, std::mt19937_64& rng) {
std::vector<Cell> v; v.reserve(size);
for (int i = 0; i < size; ++i) v.push_back(i % 16);
std::shuffle(v.begin(), v.end(), rng);
return v;
}
std::vector<Cell> all_equal_int(int size, std::mt19937_64&) {
std::vector<Cell> v; v.reserve(size);
for (int i = 0; i < size; ++i) v.push_back(0);
return v;
}
std::vector<Cell> ascending_int(int size, std::mt19937_64&) {
std::vector<Cell> v; v.reserve(size);
for (int i = 0; i < size; ++i) v.push_back(i);
return v;
}
std::vector<Cell> descending_int(int size, std::mt19937_64&) {
std::vector<Cell> v; v.reserve(size);
for (int i = size - 1; i >= 0; --i) v.push_back(i);
return v;
}
std::vector<Cell> pipe_organ_int(int size, std::mt19937_64&) {
std::vector<Cell> v; v.reserve(size);
for (int i = 0; i < size / 2; ++i) v.push_back(i);
for (int i = size / 2; i < size; ++i) v.push_back(size - i);
return v;
}
std::vector<Cell> push_front_int(int size, std::mt19937_64&) {
std::vector<Cell> v; v.reserve(size);
for (int i = 1; i < size; ++i) v.push_back(i);
v.push_back(0);
return v;
}
std::vector<Cell> push_middle_int(int size, std::mt19937_64&) {
std::vector<Cell> v; v.reserve(size);
for (int i = 0; i < size; ++i) {
if (i != size / 2) v.push_back(i);
}
v.push_back(size / 2);
return v;
}
template<class Iter, class Compare>
void heapsort(Iter begin, Iter end, Compare comp) {
std::make_heap(begin, end, comp);
std::sort_heap(begin, end, comp);
}
int comparetor(const void* a, const void* b)
{
return (((Cell*)a)->value - ((Cell*)b)->value);
}
int main() {
auto seed = std::time(0);
std::mt19937_64 el;
typedef std::vector<Cell>(*DistrF)(int, std::mt19937_64&);
typedef void (*SortF)(std::vector<Cell>::iterator, std::vector<Cell>::iterator, std::less<Cell>);
std::pair<std::string, DistrF> distributions[] = {
{"shuffled_int", shuffled_int},
{"shuffled_16_values_int", shuffled_16_values_int},
{"all_equal_int", all_equal_int},
{"ascending_int", ascending_int},
{"descending_int", descending_int},
{"pipe_organ_int", pipe_organ_int},
{"push_front_int", push_front_int},
{"push_middle_int", push_middle_int}
};
std::pair<std::string, SortF> sorts[] = {
{"pdqsort", &pdqsort<std::vector<Cell>::iterator, std::less<Cell>>},
//{"std::sort", &std::sort<std::vector<int>::iterator, std::less<int>>},
//{"std::stable_sort", &std::stable_sort<std::vector<int>::iterator, std::less<int>>},
// {"std::sort_heap", &heapsort<std::vector<int>::iterator, std::less<int>>},
//{"timsort", &gfx::timsort<std::vector<int>::iterator, std::less<int>>}
//{"qsort", &qqsort}
};
int sizes[] = { 1000000, 100 };
for (auto& distribution : distributions) {
for (auto& sort : sorts) {
el.seed(seed);
for (auto size : sizes) {
std::chrono::time_point<std::chrono::high_resolution_clock> total_start, total_end;
std::vector<uint64_t> cycles;
total_start = std::chrono::high_resolution_clock::now();
total_end = std::chrono::high_resolution_clock::now();
while (std::chrono::duration_cast<std::chrono::milliseconds>(total_end - total_start).count() < 5000) {
std::vector<Cell> v = distribution.second(size, el);
uint64_t start = rdtsc();
sort.second(v.begin(), v.end(), std::less<Cell>());
// qqsort(v.data(), v.size(), sizeof(Cell), comparetor);
uint64_t end = rdtsc();
cycles.push_back(uint64_t(double(end - start) / size + 0.5));
total_end = std::chrono::high_resolution_clock::now();
//if (!std::is_sorted(v.begin(), v.end())) {
// std::cerr << "sort failed: ";
// std::cerr << size << " " << distribution.first << " " << sort.first << "\n";
//}
}
std::sort(cycles.begin(), cycles.end());
std::cerr << "pdqsort " << size << " " << distribution.first << " "
<< cycles[cycles.size() / 2] << "\n";
}
for (auto size : sizes) {
std::chrono::time_point<std::chrono::high_resolution_clock> total_start, total_end;
std::vector<uint64_t> cycles;
total_start = std::chrono::high_resolution_clock::now();
total_end = std::chrono::high_resolution_clock::now();
while (std::chrono::duration_cast<std::chrono::milliseconds>(total_end - total_start).count() < 5000) {
std::vector<Cell> v = distribution.second(size, el);
uint64_t start = rdtsc();
//sort.second(v.begin(), v.end(), std::less<Cell>());
qqsort(v.data(), v.size(), sizeof(Cell), comparetor);
uint64_t end = rdtsc();
cycles.push_back(uint64_t(double(end - start) / size + 0.5));
total_end = std::chrono::high_resolution_clock::now();
//if (!std::is_sorted(v.begin(), v.end())) {
// std::cerr << "sort failed: ";
// std::cerr << size << " " << distribution.first << " " << sort.first << "\n";
//}
}
std::sort(cycles.begin(), cycles.end());
std::cerr << "qsort " << size << " " << distribution.first << " "
<< cycles[cycles.size() / 2] << "\n";
}
}
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment