Skip to content

Instantly share code, notes, and snippets.

@sjswitzer
Last active August 13, 2021 21:04
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sjswitzer/1dc76dc0b4dcf67a7fef to your computer and use it in GitHub Desktop.
Save sjswitzer/1dc76dc0b4dcf67a7fef to your computer and use it in GitHub Desktop.
Bottom-up natural mergesort in C++
//
// Copyright (c) 2015 Stan Switzer.
//
#ifndef llsort_llsort_h
#define llsort_llsort_h
#include <vector>
#include <functional>
template <typename T>
struct link
{
link(T value, link* next = nullptr) : next(next), value(value) {}
link *next;
T value;
};
template <typename T, class Compare>
link<T> *merge(link<T> *list1, link<T> *list2, Compare comp) {
link<T> *merged = 0, **mergedend = &merged;
while (list1 && list2) {
if (comp(list1->value, list2->value)) {
*mergedend = list1;
list1 = list1->next;
} else {
*mergedend = list2;
list2 = list2->next;
}
mergedend = &((*mergedend)->next);
}
if (list1) {
*mergedend = list1;
} else {
*mergedend = list2;
}
return merged;
}
template <typename T>
link<T> *llsort(link<T> *list) { return llsort(list, std::less_equal<T>()); }
template <typename T, class Compare>
link<T> *llsort(link<T> *list, Compare comp) {
std::vector<link<T>*> stack;
while (list) {
// Accumulate runs so that sorting mostly-sorted lists is fast.
// Complexity of the sort is O(n log m) where m is the number of runs and n is the number of items.
// Best case is O(n); worst case is O(n log n).
link<T> *run = list, *runend = run;
list = list->next;
run->next = nullptr;
while (list) {
if (comp(runend->value, list->value)) {
// Greater than or equal than the end of the run: append
runend->next = list;
runend = list;
list = list->next;
runend->next = nullptr;
} else if (!comp(run->value, list->value)) {
// Less than the beginning of the run: prepend
// New item must be strictly less than the front item to preserve sort stability.
link<T> *tmp = list;
list = list->next;
tmp->next = run;
run = tmp;
} else {
break;
}
}
// Merge with stack elements using mergesort's sequence of merges.
// Consider sorting the list (d a g i b e c f j h) (ignoring runs).
// The stack states proceed as follows:
//
// [ ]
// [ (d) ]
// [ () (a d) ]
// [ (g), (a d) ]
// [ () () (a d g i) ]
// [ (b) () (a d g i) ]
// [ () (b e) (a d g i) ]
// [ (c) (b e) (a d g i ) ]
// [ () () () (a b c d e f g i) ]
// [ (j) () () (a b c d e f g i) ]
// [ () (h j) () (a b c d e f g i) ]
//
// Note that the number of items (runs) at stack[i] is either zero or 2^i and the stack size is bounded
// 1+log2(nruns). There's a passing similarity to Timsort here, though Timsort maintains its stack using
// something like a Fibonacci sequence where this uses powers of two.
int i = 0;
for ( ; i < stack.size(); ++i) {
if (!stack[i])
break;
run = merge(run, stack[i], comp);
stack[i] = nullptr;
}
if (i < stack.size()) {
stack[i] = run;
} else {
stack.push_back(run);
}
}
// Merge all remaining stack elements
list = nullptr;
for (link<T> *tmp : stack) {
list = merge(list, tmp, comp);
}
return list;
}
#endif
//
// Copyright (c) 2015 Stan Switzer.
//
#include <iostream>
#include "llsort.h"
int main(int argc, const char * argv[])
{
typedef link<std::string> Link;
Link *list;
// d a g i b e c f j h
list = new Link("d");
list = new Link("a", list);
list = new Link("g", list);
list = new Link("i", list);
list = new Link("b", list);
list = new Link("e", list);
list = new Link("c", list);
list = new Link("f", list);
list = new Link("j", list);
list = new Link("h", list);
list = llsort(list);
for (Link *link = list; link; link = link->next) {
std::cout << link->value << std::endl;
}
return 0;
}
@sjswitzer
Copy link
Author

Compare to the Haskell implementation.

The algorithm is described in the other gist.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment