Skip to content

Instantly share code, notes, and snippets.

@dhruvbird
Created April 12, 2012 17:34
Show Gist options
  • Save dhruvbird/2369438 to your computer and use it in GitHub Desktop.
Save dhruvbird/2369438 to your computer and use it in GitHub Desktop.
Optimized Bottom-up Merge Sort beats std::sort()
/*
* Output on my machine (i686 32-bit):
*
* qsort: 3.08 sec
* Bottom up Merge Sort: 1.98 sec
* Top down Merge Sort: 2.43 sec
* std::sort: 2.22 sec
*
*/
#include <vector>
#include <iostream>
#include <algorithm>
#include <stack>
#include <stdio.h>
#include <time.h>
using namespace std;
int log2(int n) {
int lg2 = 0;
while (n > 1) {
n /= 2;
++lg2;
}
return lg2;
}
// Assume that f1..l1 & f2..l2 are contiguous buffer laid out one
// after another.
template <typename T>
void
merge_fast(T *f1, T *l1, T *f2, T *l2, T *buff) {
// copy [f1..l1) to buff
std::copy(f1, l1, buff);
T *bend = buff + (l1 - f1);
while (f2 != l2 && buff != bend) {
if (*buff < *f2) {
*f1++ = *buff++;
} else {
*f1++ = *f2++;
}
}
while (f2 != l2) {
*f1++ = *f2++;
}
while (buff != bend) {
*f1++ = *buff++;
}
}
template <typename T>
void
merge_sort_bottom_up(T *a, int i, int j) {
int sz = j - i;
if (sz < 2) {
return;
}
int r = 2;
int len_max = 2;
vector<T> tmp(sz);
T *buff = &(tmp.front());
while (r < j) {
int mpow2 = (r & -r);
len_max = mpow2 > len_max ? mpow2 : len_max;
int t = 2;
while (t <= mpow2) {
int csz = t / 2;
T *c1 = a + r - t;
T *c2 = a + r - csz;
merge_fast(c1, c1 + csz, c2, c2 + csz, buff);
t *= 2;
}
r += 2;
}
if ((sz & -sz) != sz) {
// input is not a power of 2. perform a final merge pass
r = 1 << (log2(r) + 1);
len_max *= 2;
int t = 2;
while (t <= len_max) {
int csz = t / 2;
T *c1 = a + r - t;
T *c2 = a + r - csz;
if (c2 < a + j) {
int sz2 = j - (r - csz);
merge_fast(c1, c1 + csz, c2, c2 + sz2, buff);
}
t *= 2;
}
}
}
template <typename T>
void
merge_sort_top_down(T *a, T *buff, int i, int j) {
int sz = j - i;
if (sz < 2) {
return;
}
else {
int half = i + (j - i) / 2;
merge_sort_top_down(a, buff, i, half);
merge_sort_top_down(a, buff, half, j);
std::merge(a+i, a+half, a+half, a+j, buff);
std::copy(buff, buff + sz, a+i);
}
}
int
int_cmp(const void *lhs, const void *rhs) {
return (int)lhs - (int)rhs;
}
int
main() {
vector<int> v(30000000);
for (int i = 0; i < v.size(); ++i) {
v[i] = i*3282 + 8929;
}
vector<int> buff(v.size());
time_t s = clock();
qsort(&v.front(), v.size(), sizeof(int), int_cmp);
time_t e = clock();
printf("qsort: %.2f sec\n", (double)(e-s)/CLOCKS_PER_SEC);
for (int i = 0; i < v.size(); ++i) {
v[i] = i*3282 + 8929;
}
s = e;
merge_sort_bottom_up(&v.front(), 0, v.size());
e = clock();
printf("Bottom up Merge Sort: %.2f sec\n", (double)(e-s)/CLOCKS_PER_SEC);
for (int i = 0; i < v.size(); ++i) {
v[i] = i*3282 + 8929;
}
s = e;
merge_sort_top_down(&v.front(), &buff.front(), 0, v.size());
e = clock();
printf("Top down Merge Sort: %.2f sec\n", (double)(e-s)/CLOCKS_PER_SEC);
for (int i = 0; i < v.size(); ++i) {
v[i] = i*3282 + 8929;
}
s = e;
std::sort(v.begin(), v.end());
e = clock();
printf("std::sort: %.2f sec\n", (double)(e-s)/CLOCKS_PER_SEC);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment