Last active
August 13, 2021 21:04
-
-
Save sjswitzer/1dc76dc0b4dcf67a7fef to your computer and use it in GitHub Desktop.
Bottom-up natural mergesort in C++
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Copyright (c) 2015 Stan Switzer. | |
// | |
#ifndef llsort_llsort_h | |
#define llsort_llsort_h | |
#include <vector> | |
#include <functional> | |
template <typename T> | |
struct link | |
{ | |
link(T value, link* next = nullptr) : next(next), value(value) {} | |
link *next; | |
T value; | |
}; | |
template <typename T, class Compare> | |
link<T> *merge(link<T> *list1, link<T> *list2, Compare comp) { | |
link<T> *merged = 0, **mergedend = &merged; | |
while (list1 && list2) { | |
if (comp(list1->value, list2->value)) { | |
*mergedend = list1; | |
list1 = list1->next; | |
} else { | |
*mergedend = list2; | |
list2 = list2->next; | |
} | |
mergedend = &((*mergedend)->next); | |
} | |
if (list1) { | |
*mergedend = list1; | |
} else { | |
*mergedend = list2; | |
} | |
return merged; | |
} | |
template <typename T> | |
link<T> *llsort(link<T> *list) { return llsort(list, std::less_equal<T>()); } | |
template <typename T, class Compare> | |
link<T> *llsort(link<T> *list, Compare comp) { | |
std::vector<link<T>*> stack; | |
while (list) { | |
// Accumulate runs so that sorting mostly-sorted lists is fast. | |
// Complexity of the sort is O(n log m) where m is the number of runs and n is the number of items. | |
// Best case is O(n); worst case is O(n log n). | |
link<T> *run = list, *runend = run; | |
list = list->next; | |
run->next = nullptr; | |
while (list) { | |
if (comp(runend->value, list->value)) { | |
// Greater than or equal than the end of the run: append | |
runend->next = list; | |
runend = list; | |
list = list->next; | |
runend->next = nullptr; | |
} else if (!comp(run->value, list->value)) { | |
// Less than the beginning of the run: prepend | |
// New item must be strictly less than the front item to preserve sort stability. | |
link<T> *tmp = list; | |
list = list->next; | |
tmp->next = run; | |
run = tmp; | |
} else { | |
break; | |
} | |
} | |
// Merge with stack elements using mergesort's sequence of merges. | |
// Consider sorting the list (d a g i b e c f j h) (ignoring runs). | |
// The stack states proceed as follows: | |
// | |
// [ ] | |
// [ (d) ] | |
// [ () (a d) ] | |
// [ (g), (a d) ] | |
// [ () () (a d g i) ] | |
// [ (b) () (a d g i) ] | |
// [ () (b e) (a d g i) ] | |
// [ (c) (b e) (a d g i ) ] | |
// [ () () () (a b c d e f g i) ] | |
// [ (j) () () (a b c d e f g i) ] | |
// [ () (h j) () (a b c d e f g i) ] | |
// | |
// Note that the number of items (runs) at stack[i] is either zero or 2^i and the stack size is bounded | |
// 1+log2(nruns). There's a passing similarity to Timsort here, though Timsort maintains its stack using | |
// something like a Fibonacci sequence where this uses powers of two. | |
int i = 0; | |
for ( ; i < stack.size(); ++i) { | |
if (!stack[i]) | |
break; | |
run = merge(run, stack[i], comp); | |
stack[i] = nullptr; | |
} | |
if (i < stack.size()) { | |
stack[i] = run; | |
} else { | |
stack.push_back(run); | |
} | |
} | |
// Merge all remaining stack elements | |
list = nullptr; | |
for (link<T> *tmp : stack) { | |
list = merge(list, tmp, comp); | |
} | |
return list; | |
} | |
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Copyright (c) 2015 Stan Switzer. | |
// | |
#include <iostream> | |
#include "llsort.h" | |
int main(int argc, const char * argv[]) | |
{ | |
typedef link<std::string> Link; | |
Link *list; | |
// d a g i b e c f j h | |
list = new Link("d"); | |
list = new Link("a", list); | |
list = new Link("g", list); | |
list = new Link("i", list); | |
list = new Link("b", list); | |
list = new Link("e", list); | |
list = new Link("c", list); | |
list = new Link("f", list); | |
list = new Link("j", list); | |
list = new Link("h", list); | |
list = llsort(list); | |
for (Link *link = list; link; link = link->next) { | |
std::cout << link->value << std::endl; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Compare to the Haskell implementation.
The algorithm is described in the other gist.