Skip to content

Instantly share code, notes, and snippets.

@zeux
Created February 12, 2016 08:37
Show Gist options
  • Save zeux/148aed5d4bbc8c74a7f4 to your computer and use it in GitHub Desktop.
Save zeux/148aed5d4bbc8c74a7f4 to your computer and use it in GitHub Desktop.
Quicksort killer sequence generator
/*
* This code demonstrates one interesting technique and one defect of Microsoft STL implementation
* 1. Using a slightly instrumented version of the quicksort routine, it constructs an array of integers
* such that the median is always selected in the worst possible way, which results in quadratic performance.
* 2. std::sort uses a variation of introsort with insertion sort for small chunks; this means that in theory
* it has guaranteed worst-case O(nlogn) performance, because it switches to heap sort if the recursion limit
* is exceeded. However, the debug checks (activated with _DEBUG or _HAS_ITERATOR_DEBUGGING in MSVC 2005+) make
* heap sort quadratic.
*
* Example output:
* > cl /D_DEBUG /MTd std_sort_worst_sequence.cpp && std_sort_worst_sequence
* > std_sort_worst_sequence.exe
* generated sequence with size 10000
* sorted sequence using 49338532 comparisons
*/
#include <algorithm>
#include <string>
#include <vector>
#include <map>
#include <set>
#include <assert.h>
#include <stdio.h>
namespace std
{
template<class _RanIt, class _Pr> void sort_instrumented(_RanIt _First, _RanIt _Last, _Pr _Pred);
}
struct element
{
std::string* data;
char last;
bool operator<(const element& o) const
{
return *data < *o.data;
}
};
struct sort_context
{
virtual bool less(const element& lhs, const element& rhs) { return lhs.last < rhs.last; }
virtual void partition_begin() {}
virtual void partition_median(const element* med) {}
virtual void partition_end(const element* right_begin, const element* right_end) {}
};
struct predicate
{
sort_context* context;
bool operator()(const element& lhs, const element& rhs) const
{
return context->less(lhs, rhs);
}
};
void sort(element* data, size_t count, sort_context* context)
{
// make a temporary copy
std::vector<element> copy(data, data + count);
// sort the original array
predicate pred = {context};
std::sort_instrumented(data, data + count, pred);
// restore array contents
std::copy(copy.begin(), copy.end(), data);
}
std::pair<std::vector<size_t>, size_t> get_first_median_positions(element* data, size_t count)
{
struct median_context: sort_context
{
bool inside;
unsigned int counter;
const element* median;
std::vector<const element*> positions;
median_context(): inside(false), counter(0), median(0)
{
}
virtual bool less(const element& lhs, const element& rhs)
{
if (inside && counter == 0)
{
positions.push_back(&lhs);
positions.push_back(&rhs);
}
return sort_context::less(lhs, rhs);
}
virtual void partition_begin()
{
assert(!inside);
inside = true;
}
virtual void partition_median(const element* med)
{
assert(inside);
inside = false;
if (counter++ == 0) median = med;
}
};
// collect median data
median_context c;
sort(data, count, &c);
if (!c.median)
{
assert(c.positions.size() == 0);
return std::make_pair(std::vector<size_t>(), 0);
}
// sort & remove duplicates
std::sort(c.positions.begin(), c.positions.end());
c.positions.erase(std::unique(c.positions.begin(), c.positions.end()), c.positions.end());
// convert from pointers to offsets
std::vector<size_t> result(c.positions.size());
for (size_t i = 0; i < result.size(); ++i) result[i] = c.positions[i] - data;
// get median position
std::vector<const element*>::iterator median = std::find(c.positions.begin(), c.positions.end(), c.median);
assert(median != c.positions.end());
return std::make_pair(result, median - c.positions.begin());
}
std::pair<size_t, size_t> get_first_partition_right_modify(element* data, size_t count)
{
struct partition_context: sort_context
{
unsigned int counter;
const element* begin;
const element* end;
partition_context(): counter(0), begin(0), end(0)
{
}
void partition_end(const element* right_begin, const element* right_end)
{
if (counter++ != 0) return;
begin = right_begin;
end = right_end;
}
};
// get partitioning data
partition_context c;
predicate pred = {&c};
std::sort_instrumented(data, data + count, pred);
// get indices
return (c.begin == 0 && c.end == 0) ? std::make_pair(0, 0) : std::make_pair(c.begin - data, c.end - data);
}
void update_array(element* data, size_t count)
{
// get positions of the first median candidates (along with the median itself)
std::pair<std::vector<size_t>, size_t> p = get_first_median_positions(data, count);
if (p.first.empty()) return;
// fill elements as follows:
// - elements from median candidates before median get an 'a' appended
// - median element gets a 'b' appended
// - all other elements get a 'c' appended (so that they go into the right half after partition)
std::map<size_t, char> actions;
for (size_t i = 0; i < p.second; ++i) actions[p.first[i]] = 'a';
actions[p.first[p.second]] = 'b';
char action_otherwise = 'c';
for (size_t i = 0; i < count; ++i)
{
std::map<size_t, char>::iterator ait = actions.find(i);
data[i].last = (ait == actions.end()) ? action_otherwise : ait->second;
*data[i].data += data[i].last;
}
// copy the elements to preserve the original data
std::vector<element> copy(data, data + count);
// get the right partition (left should be very small so we don't care)
std::pair<size_t, size_t> partition = get_first_partition_right_modify(&copy[0], count);
// process the right half
update_array(&copy[0] + partition.first, partition.second - partition.first);
}
std::vector<size_t> generate_array(size_t count)
{
// create element array with empty strings
element* data = new element[count];
for (size_t i = 0; i < count; ++i)
{
data[i].data = new std::string;
data[i].last = 0;
}
// update it to make worst possible order
update_array(data, count);
// make a sorted copy using std::multiset because std::sort is slow on this data (we prepared the data this way!)
std::multiset<element> copy_set(data, data + count);
std::vector<element> copy(copy_set.begin(), copy_set.end());
// create an order remap
std::map<std::string*, size_t> order;
for (size_t i = 0; i < copy.size(); ++i) order[copy[i].data] = i;
// create an integer array with the same order
std::vector<size_t> result;
for (size_t i = 0; i < count; ++i) result.push_back(order[data[i].data]);
// cleanup
for (size_t i = 0; i < count; ++i) delete data[i].data;
delete[] data;
return result;
}
struct counting_comparator
{
size_t* result;
counting_comparator(size_t* result): result(result)
{
}
template <typename T> bool operator()(const T& lhs, const T& rhs) const
{
(*result)++;
return lhs < rhs;
}
};
int main()
{
const size_t count = 10000;
std::vector<size_t> data = generate_array(count);
printf("generated sequence with size %u\n", (unsigned)data.size());
size_t result = 0;
std::sort(data.begin(), data.end(), counting_comparator(&result));
printf("sorted sequence using %u comparisons\n", (unsigned)result);
}
// Instrumented std::sort (changes marked with // INSTRUMENTED)
namespace std
{
template<class _RanIt,
class _Pr> inline
pair<_RanIt, _RanIt> _Unguarded_partition_instrumented(_RanIt _First, _RanIt _Last,
_Pr _Pred)
{ // partition [_First, _Last), using _Pred
_Pred.context->partition_begin(); // INSTRUMENTED
_RanIt _Mid = _First + (_Last - _First) / 2;
std::_Median(_First, _Mid, _Last - 1, _Pred);
_Pred.context->partition_median(_Mid); // INSTRUMENTED
_RanIt _Pfirst = _Mid;
_RanIt _Plast = _Pfirst + 1;
while (_First < _Pfirst
&& !_DEBUG_LT_PRED(_Pred, *(_Pfirst - 1), *_Pfirst)
&& !_Pred(*_Pfirst, *(_Pfirst - 1)))
--_Pfirst;
while (_Plast < _Last
&& !_DEBUG_LT_PRED(_Pred, *_Plast, *_Pfirst)
&& !_Pred(*_Pfirst, *_Plast))
++_Plast;
_RanIt _Gfirst = _Plast;
_RanIt _Glast = _Pfirst;
for (; ; )
{ // partition
for (; _Gfirst < _Last; ++_Gfirst)
if (_DEBUG_LT_PRED(_Pred, *_Pfirst, *_Gfirst))
;
else if (_Pred(*_Gfirst, *_Pfirst))
break;
else
std::iter_swap(_Plast++, _Gfirst);
for (; _First < _Glast; --_Glast)
if (_DEBUG_LT_PRED(_Pred, *(_Glast - 1), *_Pfirst))
;
else if (_Pred(*_Pfirst, *(_Glast - 1)))
break;
else
std::iter_swap(--_Pfirst, _Glast - 1);
if (_Glast == _First && _Gfirst == _Last)
return
_Pred.context->partition_end(_Plast, _Last), // INSTRUMENTED
(pair<_RanIt, _RanIt>(_Pfirst, _Plast));
if (_Glast == _First)
{ // no room at bottom, rotate pivot upward
if (_Plast != _Gfirst)
std::iter_swap(_Pfirst, _Plast);
++_Plast;
std::iter_swap(_Pfirst++, _Gfirst++);
}
else if (_Gfirst == _Last)
{ // no room at top, rotate pivot downward
if (--_Glast != --_Pfirst)
std::iter_swap(_Glast, _Pfirst);
std::iter_swap(_Pfirst, --_Plast);
}
else
std::iter_swap(_Gfirst++, --_Glast);
}
}
template<class _RanIt,
class _Diff,
class _Pr> inline
void _Sort_instrumented(_RanIt _First, _RanIt _Last, _Diff _Ideal, _Pr _Pred)
{ // order [_First, _Last), using _Pred
_Diff _Count;
for (; _ISORT_MAX < (_Count = _Last - _First) && 0 < _Ideal; )
{ // divide and conquer by quicksort
pair<_RanIt, _RanIt> _Mid =
std::_Unguarded_partition_instrumented(_First, _Last, _Pred);
_Ideal /= 2, _Ideal += _Ideal / 2; // allow 1.5 log2(N) divisions
if (_Mid.first - _First < _Last - _Mid.second)
{ // loop on second half
std::_Sort(_First, _Mid.first, _Ideal, _Pred);
_First = _Mid.second;
}
else
{ // loop on first half
std::_Sort(_Mid.second, _Last, _Ideal, _Pred);
_Last = _Mid.first;
}
}
if (_ISORT_MAX < _Count)
{ // heap sort if too many divisions
std::make_heap(_First, _Last, _Pred);
std::sort_heap(_First, _Last, _Pred);
}
else if (1 < _Count)
std::_Insertion_sort(_First, _Last, _Pred); // small
}
template<class _RanIt,
class _Pr> inline
void sort_instrumented(_RanIt _First, _RanIt _Last, _Pr _Pred)
{ // order [_First, _Last), using _Pred
_DEBUG_RANGE(_First, _Last);
_DEBUG_POINTER(_Pred);
std::_Sort_instrumented(_CHECKED_BASE(_First), _CHECKED_BASE(_Last), _Last - _First, _Pred);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment