Skip to content

Instantly share code, notes, and snippets.

@eskil
Created May 13, 2016 21:05
Show Gist options
  • Save eskil/fcd6890403d3b9b707ecd15be17c050f to your computer and use it in GitHub Desktop.
Save eskil/fcd6890403d3b9b707ecd15be17c050f to your computer and use it in GitHub Desktop.
/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
//
// (C) Eskil Heyn Olsen 2009
//
#include <iostream>
#include <vector>
#include <utility>
#include <iterator>
#include <algorithm>
//#define DEBUG
namespace {
template<typename Iter, typename PredT>
struct find_upper_edge_split {
private:
typedef std::pair<size_t, size_t> range_t;
public:
find_upper_edge_split (std::vector<range_t> &ranges, Iter from, Iter to, ptrdiff_t &good, ptrdiff_t &bad, PredT pred)
: m_ranges (ranges), m_from (from), m_to (to), m_good (good), m_bad (bad), m_pred (pred)
{ }
// Not const, since otherwise m_pred would have to be const
void operator() (range_t range) {
ptrdiff_t count = range.second - range.first;
ptrdiff_t step = count/2;
ptrdiff_t pos = range.first + step;
Iter it = m_from;
std::advance (it, pos);
// Optimisation - if we've encountered an earlier bad entry,
// there's no reason to check later ranges. This happens when an inaccessible entry
// causes a split, and when checking the two new ranges, the first one has a bad entry.
// No reason to check the second.
if (m_bad < pos) {
return;
}
#ifdef DEBUG
std::cout << "\tchecking " << pos << "\n";
#endif /* DEBUG */
int result = m_pred (*it);
if (result == 0) {
m_good = std::max (m_good, std::distance (m_from, it));
range.first = pos;
#ifdef DEBUG
std::cout << "\tchecking " << pos << " is good, new range = (" << range.first << "," << range.second << ")\n";
#endif /* DEBUG */
m_ranges.push_back (range);
} else if (result > 0) {
m_bad = std::min (m_bad, std::distance (m_from, it));
range.second = pos;
#ifdef DEBUG
std::cout << "\tchecking " << pos << " is bad, new range = (" << range.first << "," << range.second << ")\n";
#endif /* DEBUG */
m_ranges.push_back (range);
} else {
range_t a (range.first, range.first + step );
range_t b (range.first + step, range.second);
#ifdef DEBUG
std::cout << "\tchecking " << pos << " is inaccessible, new ranges = (" << a.first << "," << a.second << ") and (" << b.first << "," << b.second << ")\n";
#endif /* DEBUG */
// Optimisation, skip inserting ranges of size 1
if (a.second - a.first > 1) { m_ranges.push_back (a); }
if (b.second - b.first > 1) { m_ranges.push_back (b); }
}
}
private:
std::vector<range_t> &m_ranges;
Iter m_from, m_to;
ptrdiff_t &m_good, &m_bad;
PredT m_pred;
};
template<typename T>
void print_range (T t) {
std::cout << "(" << t.first << "," << t.second << ")";
}
template<typename T>
bool is_distance_1 (T range) {
return (range.second - range.first) == 1;
}
}
//
// find_upper_edge
//
// PredT is a predicate, returns int, three states, 0 = good >0 = bad <0 = inaccessible
//
// Algorithm is roughly a binary search but doing divide and conquer
// when an element cannot be checked (inaccesible). E.g. the case of
// applying binary search to source repository to find a regression
// can be thrown off course by hitting unbuildable/untestable
// revisions - aka the predicate cannot be tested. Likewise, linear
// search is undesirable since build/test time may be huge.
//
// 1: Given a range of size m (m > 1) (1...m),
// 2: Set State to contain range (1...m)
//
// 3: for each range R (k...l) in state
// 4: check pos p = m/2, if good, remember store p in A, replace R with (p...l) (bin search right)
// 5: if bad, remember store p in B, replace R with (k...p) (bin search left)
// 6: if not accessible, replace R with two ranges (k...p) and (p...l) (divide & conquer)
// 7: remove all ranges (e...f) for which f-e == 1 (optional optimisation is to not insert them in step 6)
// 8: if state is nonempty, repeat from 3.
//
// 9: Result is range (A...B)
//
// Open questions:
//
// Q: On step 4 and 5, should A = std::max (A, p) and B = std::min (B, p) ?
//
// Q: Can we optimise in step 5 by removing ranges in states to the
// right of p, once we now know that everything to the right of p
// is not the edge ?
//
// Q: ditto in 4, but that opens the question ; are we finding the
// last or first edge ? For the build issue, we care about the
// last. In which case, setting A and B gets tricky...
//
// Q: In step 3, would it be beneficial (on the average case) to go
// through ranges in a different order than linear ?
//
//
template<typename Iter, typename PredT>
std::pair<Iter, Iter> find_upper_edge (Iter from, Iter to, PredT pred) {
size_t iterations = 0;
size_t compares = 0;
typedef std::pair<size_t, size_t> range_t;
std::vector<range_t> ranges;
ptrdiff_t good = 0;
ptrdiff_t bad = std::distance (from, to);
ranges.push_back (std::make_pair (good, bad));
#ifdef DEBUG
std::cout << "Start: ";
std::for_each (ranges.begin (), ranges.end (), print_range<range_t>);
std::cout << "\n";
#endif /* DEBUG */
while (!ranges.empty ()) {
compares += ranges.size ();
// Sweep
std::vector<range_t> new_ranges;
new_ranges.reserve (ranges.size ());
std::for_each (ranges.begin (), ranges.end (),
find_upper_edge_split<Iter, PredT> (new_ranges, from, to, good, bad, pred));
ranges = new_ranges;
++iterations;
#ifdef DEBUG
std::cout << "\tNew Ranges: ";
std::for_each (ranges.begin (), ranges.end (), print_range<range_t>);
std::cout << "\n";
#endif /* DEBUG */
ranges.erase (std::remove_if (ranges.begin (), ranges.end (), is_distance_1<range_t>), ranges.end ());
#ifdef DEBUG
std::cout << "\tTrimmed Ranges: ";
std::for_each (ranges.begin (), ranges.end (), print_range<range_t>);
std::cout << "\n\tGood = " << good << " bad = " << bad << " iterations = " << iterations << "\n";
std::cout << "\n";
#endif /* DEBUG */
}
//#ifdef DEBUG
std::cout << "Done, searched " << std::distance (from, to) << " elements "
<< " in " << iterations << " iterations "
<< "with " << compares << " compares\n";
//#endif /* DEBUG */
return std::make_pair (from + good, from + bad);
}
int predicate (int v) {
if (v == 0) {
return 1;
} else if (v == 1) {
return 0;
}
return -1;
}
void test_find_upper_edge (int *range, size_t a, size_t b) {
typedef std::pair<size_t, size_t> range_t;
range_t expect (a, b);
size_t sz = 0;
for (size_t i = 0; range[i] != 9; ++i) { sz = i; }
std::pair<int*, int*> result = find_upper_edge (&range[0], &range[sz], predicate);
range_t got (std::distance (&range[0], result.first), std::distance (&range[0], result.second));
if (got == expect) {
std::cout << "*** Got (" << got.first << "," << got.second << "), ok\n";
} else {
std::cout << "*** Got (" << got.first << "," << got.second << ") "
<< "expected (" << expect.first << "," << expect.second << ")\n";
assert (got == expect);
}
}
int main (int argc, char *argv[]) {
// 0 is bad
// 1 is good
// 2 is inaccessible
// 9 is end
int a[] = { 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 };
test_find_upper_edge (a, 3, 8);
int b0[] = { 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 };
test_find_upper_edge (b0, 6, 7);
int b1[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 };
test_find_upper_edge (b1, 7, 8);
int b2[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 };
test_find_upper_edge (b2, 8, 9);
int b3[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 };
test_find_upper_edge (b3, 9, 10);
int c[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 9 };
test_find_upper_edge (c, 7, 8);
int d0[] = { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 };
test_find_upper_edge (d0, 14, 15);
int d1[] = { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 0, 2, 2, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 };
test_find_upper_edge (d1, 15, 16);
int d2[] = { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 2, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 };
test_find_upper_edge (d2, 16, 17);
int e[] = { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 };
test_find_upper_edge (e, 14, 20);
int f[] = { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 };
test_find_upper_edge (f, 6, 14);
// 0 5 10 15 20 25 30 32
int g[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 };
test_find_upper_edge (g, 20, 21);
// 0 5 10 15 20 25 30 32
int h[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 9 };
test_find_upper_edge (h, 14, 23);
// 0 5 10 15 20 25 30 32
int i[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 9 };
test_find_upper_edge (i, 24, 26);
// 0 5 10 15 20 25 30 32
int i2[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 9 };
test_find_upper_edge (i2, 14, 32);
// 0 5 10 15 20 25 30 32
int i3[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 9 };
test_find_upper_edge (i3, 14, 28);
// 0 5 10 15 20 25 30 32
int i4[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 9 };
test_find_upper_edge (i4, 14, 21);
// 0 5 10 15 20 25 30 32
int i5[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 9 };
test_find_upper_edge (i5, 21, 28);
// 0 5 10 15 20 25 30 32
int j[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9 };
test_find_upper_edge (j, 14, 15);
// This is an invalid case, since the range is already unordered
// It however happens to work if at least the last element is bad,
// in which case it'll find the last edge
// 0 5 10 15 20 25 30 32
int k0[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 9 };
test_find_upper_edge (k0, 27, 28);
int k1[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 9 };
test_find_upper_edge (k1, 31, 32);
// Last element is good, fail
//int k2[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9 };
//test_find_upper_edge (k2, 21, 22);
// Last element it inaccessible, fail
//int k3[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 9 };
//test_find_upper_edge (k3, 21, 22);
// 0 5 10 15 20 25 30 32
int l[] = { 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 9 };
test_find_upper_edge (l, 0, 32);
// 0 5 10 15 20 25 30 32
int m[] = { 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 9 };
test_find_upper_edge (m, 10, 32);
// 0 5 10 15 20 25 30 32
int n[] = { 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 9 };
test_find_upper_edge (n, 0, 24);
// 0 5 10 15 20 25 30 32
int o[] = { 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 9 };
test_find_upper_edge (o, 9, 24);
// Corner case to test short circuit when early bad build is found, but inaccessible builds left behind some splits
// 0 5 10 15 20 25 30 32
int p[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 9 };
test_find_upper_edge (p, 11, 12);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment