Created
February 12, 2016 08:37
-
-
Save zeux/148aed5d4bbc8c74a7f4 to your computer and use it in GitHub Desktop.
Quicksort killer sequence generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* This code demonstrates one interesting technique and one defect of Microsoft STL implementation | |
* 1. Using a slightly instrumented version of the quicksort routine, it constructs an array of integers | |
* such that the median is always selected in the worst possible way, which results in quadratic performance. | |
* 2. std::sort uses a variation of introsort with insertion sort for small chunks; this means that in theory | |
* it has guaranteed worst-case O(nlogn) performance, because it switches to heap sort if the recursion limit | |
* is exceeded. However, the debug checks (activated with _DEBUG or _HAS_ITERATOR_DEBUGGING in MSVC 2005+) make | |
* heap sort quadratic. | |
* | |
* Example output: | |
* > cl /D_DEBUG /MTd std_sort_worst_sequence.cpp && std_sort_worst_sequence | |
* > std_sort_worst_sequence.exe | |
* generated sequence with size 10000 | |
* sorted sequence using 49338532 comparisons | |
*/ | |
#include <algorithm> | |
#include <string> | |
#include <vector> | |
#include <map> | |
#include <set> | |
#include <assert.h> | |
#include <stdio.h> | |
namespace std | |
{ | |
template<class _RanIt, class _Pr> void sort_instrumented(_RanIt _First, _RanIt _Last, _Pr _Pred); | |
} | |
struct element | |
{ | |
std::string* data; | |
char last; | |
bool operator<(const element& o) const | |
{ | |
return *data < *o.data; | |
} | |
}; | |
struct sort_context | |
{ | |
virtual bool less(const element& lhs, const element& rhs) { return lhs.last < rhs.last; } | |
virtual void partition_begin() {} | |
virtual void partition_median(const element* med) {} | |
virtual void partition_end(const element* right_begin, const element* right_end) {} | |
}; | |
struct predicate | |
{ | |
sort_context* context; | |
bool operator()(const element& lhs, const element& rhs) const | |
{ | |
return context->less(lhs, rhs); | |
} | |
}; | |
void sort(element* data, size_t count, sort_context* context) | |
{ | |
// make a temporary copy | |
std::vector<element> copy(data, data + count); | |
// sort the original array | |
predicate pred = {context}; | |
std::sort_instrumented(data, data + count, pred); | |
// restore array contents | |
std::copy(copy.begin(), copy.end(), data); | |
} | |
std::pair<std::vector<size_t>, size_t> get_first_median_positions(element* data, size_t count) | |
{ | |
struct median_context: sort_context | |
{ | |
bool inside; | |
unsigned int counter; | |
const element* median; | |
std::vector<const element*> positions; | |
median_context(): inside(false), counter(0), median(0) | |
{ | |
} | |
virtual bool less(const element& lhs, const element& rhs) | |
{ | |
if (inside && counter == 0) | |
{ | |
positions.push_back(&lhs); | |
positions.push_back(&rhs); | |
} | |
return sort_context::less(lhs, rhs); | |
} | |
virtual void partition_begin() | |
{ | |
assert(!inside); | |
inside = true; | |
} | |
virtual void partition_median(const element* med) | |
{ | |
assert(inside); | |
inside = false; | |
if (counter++ == 0) median = med; | |
} | |
}; | |
// collect median data | |
median_context c; | |
sort(data, count, &c); | |
if (!c.median) | |
{ | |
assert(c.positions.size() == 0); | |
return std::make_pair(std::vector<size_t>(), 0); | |
} | |
// sort & remove duplicates | |
std::sort(c.positions.begin(), c.positions.end()); | |
c.positions.erase(std::unique(c.positions.begin(), c.positions.end()), c.positions.end()); | |
// convert from pointers to offsets | |
std::vector<size_t> result(c.positions.size()); | |
for (size_t i = 0; i < result.size(); ++i) result[i] = c.positions[i] - data; | |
// get median position | |
std::vector<const element*>::iterator median = std::find(c.positions.begin(), c.positions.end(), c.median); | |
assert(median != c.positions.end()); | |
return std::make_pair(result, median - c.positions.begin()); | |
} | |
std::pair<size_t, size_t> get_first_partition_right_modify(element* data, size_t count) | |
{ | |
struct partition_context: sort_context | |
{ | |
unsigned int counter; | |
const element* begin; | |
const element* end; | |
partition_context(): counter(0), begin(0), end(0) | |
{ | |
} | |
void partition_end(const element* right_begin, const element* right_end) | |
{ | |
if (counter++ != 0) return; | |
begin = right_begin; | |
end = right_end; | |
} | |
}; | |
// get partitioning data | |
partition_context c; | |
predicate pred = {&c}; | |
std::sort_instrumented(data, data + count, pred); | |
// get indices | |
return (c.begin == 0 && c.end == 0) ? std::make_pair(0, 0) : std::make_pair(c.begin - data, c.end - data); | |
} | |
void update_array(element* data, size_t count) | |
{ | |
// get positions of the first median candidates (along with the median itself) | |
std::pair<std::vector<size_t>, size_t> p = get_first_median_positions(data, count); | |
if (p.first.empty()) return; | |
// fill elements as follows: | |
// - elements from median candidates before median get an 'a' appended | |
// - median element gets a 'b' appended | |
// - all other elements get a 'c' appended (so that they go into the right half after partition) | |
std::map<size_t, char> actions; | |
for (size_t i = 0; i < p.second; ++i) actions[p.first[i]] = 'a'; | |
actions[p.first[p.second]] = 'b'; | |
char action_otherwise = 'c'; | |
for (size_t i = 0; i < count; ++i) | |
{ | |
std::map<size_t, char>::iterator ait = actions.find(i); | |
data[i].last = (ait == actions.end()) ? action_otherwise : ait->second; | |
*data[i].data += data[i].last; | |
} | |
// copy the elements to preserve the original data | |
std::vector<element> copy(data, data + count); | |
// get the right partition (left should be very small so we don't care) | |
std::pair<size_t, size_t> partition = get_first_partition_right_modify(©[0], count); | |
// process the right half | |
update_array(©[0] + partition.first, partition.second - partition.first); | |
} | |
std::vector<size_t> generate_array(size_t count) | |
{ | |
// create element array with empty strings | |
element* data = new element[count]; | |
for (size_t i = 0; i < count; ++i) | |
{ | |
data[i].data = new std::string; | |
data[i].last = 0; | |
} | |
// update it to make worst possible order | |
update_array(data, count); | |
// make a sorted copy using std::multiset because std::sort is slow on this data (we prepared the data this way!) | |
std::multiset<element> copy_set(data, data + count); | |
std::vector<element> copy(copy_set.begin(), copy_set.end()); | |
// create an order remap | |
std::map<std::string*, size_t> order; | |
for (size_t i = 0; i < copy.size(); ++i) order[copy[i].data] = i; | |
// create an integer array with the same order | |
std::vector<size_t> result; | |
for (size_t i = 0; i < count; ++i) result.push_back(order[data[i].data]); | |
// cleanup | |
for (size_t i = 0; i < count; ++i) delete data[i].data; | |
delete[] data; | |
return result; | |
} | |
struct counting_comparator | |
{ | |
size_t* result; | |
counting_comparator(size_t* result): result(result) | |
{ | |
} | |
template <typename T> bool operator()(const T& lhs, const T& rhs) const | |
{ | |
(*result)++; | |
return lhs < rhs; | |
} | |
}; | |
int main() | |
{ | |
const size_t count = 10000; | |
std::vector<size_t> data = generate_array(count); | |
printf("generated sequence with size %u\n", (unsigned)data.size()); | |
size_t result = 0; | |
std::sort(data.begin(), data.end(), counting_comparator(&result)); | |
printf("sorted sequence using %u comparisons\n", (unsigned)result); | |
} | |
// Instrumented std::sort (changes marked with // INSTRUMENTED) | |
namespace std | |
{ | |
template<class _RanIt, | |
class _Pr> inline | |
pair<_RanIt, _RanIt> _Unguarded_partition_instrumented(_RanIt _First, _RanIt _Last, | |
_Pr _Pred) | |
{ // partition [_First, _Last), using _Pred | |
_Pred.context->partition_begin(); // INSTRUMENTED | |
_RanIt _Mid = _First + (_Last - _First) / 2; | |
std::_Median(_First, _Mid, _Last - 1, _Pred); | |
_Pred.context->partition_median(_Mid); // INSTRUMENTED | |
_RanIt _Pfirst = _Mid; | |
_RanIt _Plast = _Pfirst + 1; | |
while (_First < _Pfirst | |
&& !_DEBUG_LT_PRED(_Pred, *(_Pfirst - 1), *_Pfirst) | |
&& !_Pred(*_Pfirst, *(_Pfirst - 1))) | |
--_Pfirst; | |
while (_Plast < _Last | |
&& !_DEBUG_LT_PRED(_Pred, *_Plast, *_Pfirst) | |
&& !_Pred(*_Pfirst, *_Plast)) | |
++_Plast; | |
_RanIt _Gfirst = _Plast; | |
_RanIt _Glast = _Pfirst; | |
for (; ; ) | |
{ // partition | |
for (; _Gfirst < _Last; ++_Gfirst) | |
if (_DEBUG_LT_PRED(_Pred, *_Pfirst, *_Gfirst)) | |
; | |
else if (_Pred(*_Gfirst, *_Pfirst)) | |
break; | |
else | |
std::iter_swap(_Plast++, _Gfirst); | |
for (; _First < _Glast; --_Glast) | |
if (_DEBUG_LT_PRED(_Pred, *(_Glast - 1), *_Pfirst)) | |
; | |
else if (_Pred(*_Pfirst, *(_Glast - 1))) | |
break; | |
else | |
std::iter_swap(--_Pfirst, _Glast - 1); | |
if (_Glast == _First && _Gfirst == _Last) | |
return | |
_Pred.context->partition_end(_Plast, _Last), // INSTRUMENTED | |
(pair<_RanIt, _RanIt>(_Pfirst, _Plast)); | |
if (_Glast == _First) | |
{ // no room at bottom, rotate pivot upward | |
if (_Plast != _Gfirst) | |
std::iter_swap(_Pfirst, _Plast); | |
++_Plast; | |
std::iter_swap(_Pfirst++, _Gfirst++); | |
} | |
else if (_Gfirst == _Last) | |
{ // no room at top, rotate pivot downward | |
if (--_Glast != --_Pfirst) | |
std::iter_swap(_Glast, _Pfirst); | |
std::iter_swap(_Pfirst, --_Plast); | |
} | |
else | |
std::iter_swap(_Gfirst++, --_Glast); | |
} | |
} | |
template<class _RanIt, | |
class _Diff, | |
class _Pr> inline | |
void _Sort_instrumented(_RanIt _First, _RanIt _Last, _Diff _Ideal, _Pr _Pred) | |
{ // order [_First, _Last), using _Pred | |
_Diff _Count; | |
for (; _ISORT_MAX < (_Count = _Last - _First) && 0 < _Ideal; ) | |
{ // divide and conquer by quicksort | |
pair<_RanIt, _RanIt> _Mid = | |
std::_Unguarded_partition_instrumented(_First, _Last, _Pred); | |
_Ideal /= 2, _Ideal += _Ideal / 2; // allow 1.5 log2(N) divisions | |
if (_Mid.first - _First < _Last - _Mid.second) | |
{ // loop on second half | |
std::_Sort(_First, _Mid.first, _Ideal, _Pred); | |
_First = _Mid.second; | |
} | |
else | |
{ // loop on first half | |
std::_Sort(_Mid.second, _Last, _Ideal, _Pred); | |
_Last = _Mid.first; | |
} | |
} | |
if (_ISORT_MAX < _Count) | |
{ // heap sort if too many divisions | |
std::make_heap(_First, _Last, _Pred); | |
std::sort_heap(_First, _Last, _Pred); | |
} | |
else if (1 < _Count) | |
std::_Insertion_sort(_First, _Last, _Pred); // small | |
} | |
template<class _RanIt, | |
class _Pr> inline | |
void sort_instrumented(_RanIt _First, _RanIt _Last, _Pr _Pred) | |
{ // order [_First, _Last), using _Pred | |
_DEBUG_RANGE(_First, _Last); | |
_DEBUG_POINTER(_Pred); | |
std::_Sort_instrumented(_CHECKED_BASE(_First), _CHECKED_BASE(_Last), _Last - _First, _Pred); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment