-
-
Save HackerFoo/a5b4923429610b00c49e0946cd71550d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/libs/libvtrcapnproto/connection_map.capnp b/libs/libvtrcapnproto/connection_map.capnp | |
index a0192a57c..30b986415 100644 | |
--- a/libs/libvtrcapnproto/connection_map.capnp | |
+++ b/libs/libvtrcapnproto/connection_map.capnp | |
@@ -13,8 +13,13 @@ struct VprVector2D { | |
y @1 :Int64; | |
} | |
+struct VprFloatEntry { | |
+ value @0 :Float32; | |
+} | |
+ | |
struct VprCostMap { | |
costMap @0 :Matrix.Matrix((Matrix.Matrix(VprCostEntry))); | |
offset @1 :Matrix.Matrix(VprVector2D); | |
segmentMap @2 :List(Int64); | |
+ penalty @3 :Matrix.Matrix(VprFloatEntry); | |
} | |
diff --git a/libs/libvtrutil/src/vtr_geometry.h b/libs/libvtrutil/src/vtr_geometry.h | |
index 38563f518..0545581b3 100644 | |
--- a/libs/libvtrutil/src/vtr_geometry.h | |
+++ b/libs/libvtrutil/src/vtr_geometry.h | |
@@ -1,10 +1,12 @@ | |
#ifndef VTR_GEOMETRY_H | |
#define VTR_GEOMETRY_H | |
#include "vtr_range.h" | |
+#include "vtr_assert.h" | |
#include <vector> | |
#include <tuple> | |
#include <limits> | |
+#include <type_traits> | |
namespace vtr { | |
@@ -78,7 +80,13 @@ class Rect { | |
Rect(); | |
Rect(T left_val, T bottom_val, T right_val, T top_val); | |
Rect(Point<T> bottom_left_val, Point<T> top_right_val); | |
- Rect(Point<T> bottom_left_val, T size); | |
+ | |
+ //Constructs a rectangle that only contains the given point | |
+ // Rect(p1).contains(p2) => p1 == p2 | |
+ //It is only enabled for integral types, because making this work for floating point types would be difficult and brittle. | |
+ //The following line only enables the constructor if std::is_integral<T>::value == true | |
+ template<typename U = T, typename std::enable_if<std::is_integral<U>::value>::type...> | |
+ Rect(Point<U> point); | |
public: //Accessors | |
//Co-ordinates | |
@@ -103,16 +111,13 @@ class Rect { | |
bool coincident(Point<T> point) const; | |
//Returns true if no points are contained in the rectangle | |
+ // rect.empty() => not exists p. rect.contains(p) | |
+ // This also implies either the width or height is 0. | |
bool empty() const; | |
friend bool operator== <>(const Rect<T>& lhs, const Rect<T>& rhs); | |
friend bool operator!= <>(const Rect<T>& lhs, const Rect<T>& rhs); | |
- template<class U> | |
- friend Rect<U> operator|(const Rect<U>& lhs, const Rect<U>& rhs); | |
- template<class U> | |
- friend Rect<U>& operator|=(Rect<U>& lhs, const Rect<U>& rhs); | |
- | |
public: //Mutators | |
//Co-ordinates | |
void set_xmin(T xmin_val); | |
@@ -120,11 +125,27 @@ class Rect { | |
void set_xmax(T xmax_val); | |
void set_ymax(T ymax_val); | |
+ //Equivalent to `*this = bounding_box(*this, other)` | |
+ Rect<T>& expand_bounding_box(const Rect<T>& other); | |
+ | |
private: | |
Point<T> bottom_left_; | |
Point<T> top_right_; | |
}; | |
+//Return the smallest rectangle containing both given rectangles | |
+//Note that this isn't a union and the resulting rectangle may include points not in either given rectangle | |
+template<class T> | |
+Rect<T> bounding_box(const Rect<T>& lhs, const Rect<T>& rhs); | |
+ | |
+//Sample on a uniformly spaced grid within a rectangle | |
+// sample(vtr::Rect(l, h), 0, 0, M) == l | |
+// sample(vtr::Rect(l, h), M, M, M) == h | |
+//To avoid the edges, use `sample(r, x+1, y+1, N+1) for x, y, in 0..N-1 | |
+//Only defined for integral types | |
+template<typename T, typename std::enable_if<std::is_integral<T>::value>::type...> | |
+Point<T> sample(const vtr::Rect<T>& r, T x, T y, T d); | |
+ | |
//A 2D line | |
template<class T> | |
class Line { | |
diff --git a/libs/libvtrutil/src/vtr_geometry.tpp b/libs/libvtrutil/src/vtr_geometry.tpp | |
index 00a6decc2..5fd065b31 100644 | |
--- a/libs/libvtrutil/src/vtr_geometry.tpp | |
+++ b/libs/libvtrutil/src/vtr_geometry.tpp | |
@@ -80,11 +80,13 @@ Rect<T>::Rect(Point<T> bottom_left_val, Point<T> top_right_val) | |
//pass | |
} | |
+//Only defined for integral types | |
template<class T> | |
-Rect<T>::Rect(Point<T> bottom_left_val, T size) | |
- : bottom_left_(bottom_left_val) | |
- , top_right_(bottom_left_val.x() + size, | |
- bottom_left_val.y() + size) { | |
+template<typename U, typename std::enable_if<std::is_integral<U>::value>::type...> | |
+Rect<T>::Rect(Point<U> point) | |
+ : bottom_left_(point) | |
+ , top_right_(point.x() + 1, | |
+ point.y() + 1) { | |
//pass | |
} | |
@@ -165,6 +167,22 @@ bool operator!=(const Rect<T>& lhs, const Rect<T>& rhs) { | |
return !(lhs == rhs); | |
} | |
+template<class T> | |
+Rect<T> bounding_box(const Rect<T>& lhs, const Rect<T>& rhs) { | |
+ return Rect<T>(std::min(lhs.xmin(), rhs.xmin()), | |
+ std::min(lhs.ymin(), rhs.ymin()), | |
+ std::max(lhs.xmax(), rhs.xmax()), | |
+ std::max(lhs.ymax(), rhs.ymax())); | |
+} | |
+ | |
+//Only defined for integral types | |
+template<typename T, typename std::enable_if<std::is_integral<T>::value>::type...> | |
+Point<T> sample(const vtr::Rect<T>& r, T x, T y, T d) { | |
+ VTR_ASSERT(d > 0); | |
+ return Point<T>((r.xmin() * (d - x) + r.xmax() * x + d / 2) / d, | |
+ (r.ymin() * (d - y) + r.ymax() * y + d / 2) / d); | |
+} | |
+ | |
template<class T> | |
void Rect<T>::set_xmin(T xmin_val) { | |
bottom_left_.set_x(xmin_val); | |
@@ -186,16 +204,9 @@ void Rect<T>::set_ymax(T ymax_val) { | |
} | |
template<class T> | |
-Rect<T> operator|(const Rect<T>& lhs, const Rect<T>& rhs) { | |
- return Rect<T>(std::min(lhs.xmin(), rhs.xmin()), | |
- std::min(lhs.ymin(), rhs.ymin()), | |
- std::max(lhs.xmax(), rhs.xmax()), | |
- std::max(lhs.ymax(), rhs.ymax())); | |
-} | |
- | |
-template<class T> | |
-Rect<T>& operator|=(Rect<T>& lhs, const Rect<T>& rhs) { | |
- return lhs = lhs | rhs; | |
+Rect<T>& Rect<T>::expand_bounding_box(const Rect<T>& other) { | |
+ *this = bounding_box(*this, other); | |
+ return *this; | |
} | |
/* | |
diff --git a/libs/libvtrutil/test/test_geometry.cpp b/libs/libvtrutil/test/test_geometry.cpp | |
index d005a4e26..8d3d91f44 100644 | |
--- a/libs/libvtrutil/test/test_geometry.cpp | |
+++ b/libs/libvtrutil/test/test_geometry.cpp | |
@@ -61,6 +61,7 @@ TEST_CASE("Rect", "[vtr_geometry/Rect]") { | |
REQUIRE(r2.contains({6, 4})); | |
REQUIRE_FALSE(r2.contains({100, 4})); | |
REQUIRE_FALSE(r2.contains(pi_2)); | |
+ REQUIRE(vtr::Rect<int>(pi_1).contains(pi_1)); | |
} | |
SECTION("strictly_contains_int") { | |
@@ -78,7 +79,19 @@ TEST_CASE("Rect", "[vtr_geometry/Rect]") { | |
} | |
SECTION("bounds_int") { | |
- REQUIRE(r1 == (r3 | r4)); | |
+ REQUIRE(r1 == bounding_box(r3, r4)); | |
+ } | |
+ | |
+ SECTION("empty_int") { | |
+ REQUIRE(vtr::Rect<int>().empty()); | |
+ } | |
+ | |
+ SECTION("sample_int") { | |
+ auto r = vtr::Rect<int>(pi_1, pi_2); | |
+ REQUIRE(sample(r, 0, 0, 17) == pi_1); | |
+ REQUIRE(sample(r, 17, 17, 17) == pi_2); | |
+ auto inside = sample(r, 3, 11, 17); | |
+ REQUIRE(r.contains(inside)); | |
} | |
} | |
@@ -92,6 +105,7 @@ TEST_CASE("Rect", "[vtr_geometry/Rect]") { | |
vtr::Rect<float> r4(pf_1, pf_2); | |
vtr::Rect<float> r5(pf_1, pf_3); | |
vtr::Rect<float> r6(pf_3, pf_2); | |
+ // vtr::Rect<float> r7(pf_1); // <-- will fail to compile | |
SECTION("equality_float") { | |
REQUIRE(r3 == r4); | |
@@ -140,7 +154,11 @@ TEST_CASE("Rect", "[vtr_geometry/Rect]") { | |
} | |
SECTION("bounds_float") { | |
- REQUIRE(r3 == (r5 | r6)); | |
+ REQUIRE(r3 == bounding_box(r5, r6)); | |
+ } | |
+ | |
+ SECTION("empty_float") { | |
+ REQUIRE(vtr::Rect<float>().empty()); | |
} | |
} | |
} | |
diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp | |
index 481d83aac..a6341de4f 100644 | |
--- a/vpr/src/route/connection_box_lookahead_map.cpp | |
+++ b/vpr/src/route/connection_box_lookahead_map.cpp | |
@@ -38,101 +38,85 @@ | |
* | |
* See e_representative_entry_method */ | |
#define REPRESENTATIVE_ENTRY_METHOD util::SMALLEST | |
-// #define FILL_LIMIT 30 | |
#define CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_MAPS | |
// Sample based an NxN grid of starting segments, where N = SAMPLE_GRID_SIZE | |
static constexpr int SAMPLE_GRID_SIZE = 3; | |
+// Don't continue storing a path after hitting a lower-or-same cost entry. | |
+static constexpr bool BREAK_ON_MISS = false; | |
+ | |
// Distance penalties filling are calculated based on available samples, but can be adjusted with this factor. | |
static constexpr float PENALTY_FACTOR = 1.f; | |
+static constexpr float PENALTY_MIN = 1e-12f; | |
+ | |
+static constexpr int MIN_PATH_COUNT = 1000; | |
+ | |
+// quantiles (like percentiles but 0-1) of segment count to use as a selection criteria | |
+// choose locations with higher, but not extreme, counts of each segment type | |
+static constexpr double kSamplingCountLowerQuantile = 0.5; | |
+static constexpr double kSamplingCountUpperQuantile = 0.7; | |
// a sample point for a segment type, contains all segments at the VPR location | |
struct SamplePoint { | |
- uint64_t order; // used to order sample points | |
+ // canonical location | |
vtr::Point<int> location; | |
- std::vector<ssize_t> samples; | |
- SamplePoint() | |
- : location(0, 0) {} | |
-}; | |
-// a grid of sample points | |
-struct SampleGrid { | |
- SamplePoint point[SAMPLE_GRID_SIZE][SAMPLE_GRID_SIZE]; | |
+ // nodes to expand | |
+ std::vector<ssize_t> nodes; | |
}; | |
-// implements a simple cache of key(K)/value(V) pairs of N entries | |
-template<class K, class V, int N> | |
-class SimpleCache { | |
- public: | |
- SimpleCache() | |
- : pos(0) | |
- , hits(0) | |
- , misses(0) {} | |
- | |
- // O(N) lookup | |
- bool get(K key, V* value) { | |
- for (int i = 0; i < N; i++) { | |
- auto& k = keys[i]; | |
- if (k == key) { | |
- auto& v = values[i]; | |
-#if defined(CONNECTION_BOX_LOOKAHEAD_PUSH_BACK_HITS) | |
- // preserve the found key by pushing it back | |
- int last = (pos + N - 1) % N; | |
- std::swap(k, keys[last]); | |
- std::swap(v, values[last]); | |
-#endif | |
- *value = v; | |
- hits++; | |
- return true; | |
- } | |
- } | |
- misses++; | |
- return false; | |
- } | |
+struct SampleRegion { | |
+ // all nodes in `points' have this segment type | |
+ int segment_type; | |
- // O(1) insertion (overwriting an older entry) | |
- void insert(K key, V val) { | |
- keys[pos] = key; | |
- values[pos] = val; | |
- pos = (pos + 1) % N; | |
- } | |
+ // location on the sample grid | |
+ vtr::Point<int> grid_location; | |
- // ratio of successful lookups | |
- float hit_ratio() { | |
- return hits ? static_cast<float>(hits) / static_cast<float>(hits + misses) : 0.f; | |
- } | |
+ // locations to try | |
+ // The computation will keep expanding each of the points | |
+ // until a number of paths (segment -> connection box) are found. | |
+ std::vector<SamplePoint> points; | |
- // ratio of unsuccessful lookups | |
- float miss_ratio() { | |
- return misses ? static_cast<float>(misses) / static_cast<float>(hits + misses) : 0.f; | |
- } | |
- | |
- private: | |
- std::array<K, N> keys; // keep keys together for faster scanning | |
- std::array<V, N> values; | |
- size_t pos; | |
- uint64_t hits; | |
- uint64_t misses; | |
+ // used to sort the regions to improve caching | |
+ uint64_t order; | |
}; | |
template<typename Entry> | |
-static void run_dijkstra(int start_node_ind, | |
- RoutingCosts* routing_costs); | |
-static void find_inodes_for_segment_types(std::vector<SampleGrid>* inodes_for_segment); | |
+static std::pair<float, int> run_dijkstra(int start_node_ind, | |
+ RoutingCosts* routing_costs); | |
+ | |
+static std::vector<SampleRegion> find_sample_regions(int num_segments); | |
// also known as the L1 norm | |
static int manhattan_distance(const vtr::Point<int>& a, const vtr::Point<int>& b) { | |
return abs(b.x() - a.x()) + abs(b.y() - a.y()); | |
} | |
+template<class T> | |
+constexpr const T& clamp(const T& v, const T& lo, const T& hi) { | |
+ return std::min(std::max(v, lo), hi); | |
+} | |
+ | |
+template<typename T> | |
+static vtr::Point<T> closest_point_in_rect(const vtr::Rect<T>& r, const vtr::Point<T>& p) { | |
+ if (r.empty()) { | |
+ return vtr::Point<T>(0, 0); | |
+ } else { | |
+ return vtr::Point<T>(clamp<T>(p.x(), r.xmin(), r.xmax() - 1), | |
+ clamp<T>(p.y(), r.ymin(), r.ymax() - 1)); | |
+ } | |
+} | |
+ | |
// resize internal data structures | |
void CostMap::set_counts(size_t seg_count, size_t box_count) { | |
cost_map_.clear(); | |
offset_.clear(); | |
+ penalty_.clear(); | |
cost_map_.resize({seg_count, box_count}); | |
offset_.resize({seg_count, box_count}); | |
+ penalty_.resize({seg_count, box_count}); | |
seg_count_ = seg_count; | |
box_count_ = box_count; | |
@@ -153,57 +137,39 @@ int CostMap::node_to_segment(int from_node_ind) const { | |
return segment_map_[from_node_ind]; | |
} | |
+static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, float penalty) { | |
+ penalty = std::max(penalty, PENALTY_MIN); | |
+ return util::Cost_Entry(entry.delay + distance * penalty * PENALTY_FACTOR, | |
+ entry.congestion, entry.fill); | |
+} | |
+ | |
// get a cost entry for a segment type, connection box type, and offset | |
-util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y, int* out_of_bounds_val) const { | |
+util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const { | |
VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); | |
const auto& cost_map = cost_map_[from_seg_index][size_t(box_id)]; | |
if (cost_map.dim_size(0) == 0 || cost_map.dim_size(1) == 0) { | |
return util::Cost_Entry(); | |
} | |
- int dx = delta_x - offset_[from_seg_index][size_t(box_id)].first; | |
- int dy = delta_y - offset_[from_seg_index][size_t(box_id)].second; | |
- | |
- int out_of_bounds_distance = 0; | |
- if (dx < 0) { | |
- out_of_bounds_distance += abs(dx); | |
- dx = 0; | |
- } | |
- if (dy < 0) { | |
- out_of_bounds_distance += abs(dy); | |
- dy = 0; | |
- } | |
- | |
- if (dx >= (ssize_t)cost_map.dim_size(0)) { | |
- out_of_bounds_distance += abs(int(dx) - int(cost_map.dim_size(0) - 1)); | |
- dx = cost_map.dim_size(0) - 1; | |
- } | |
- if (dy >= (ssize_t)cost_map.dim_size(1)) { | |
- out_of_bounds_distance += abs(int(dy) - int(cost_map.dim_size(1) - 1)); | |
- dy = cost_map.dim_size(1) - 1; | |
- } | |
- | |
- if (out_of_bounds_val != nullptr) { | |
- *out_of_bounds_val = out_of_bounds_distance; | |
- } | |
- | |
- return cost_map_[from_seg_index][size_t(box_id)][dx][dy]; | |
-} | |
- | |
-static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, float penalty) { | |
- auto out = util::Cost_Entry(entry.delay + distance * penalty * PENALTY_FACTOR, | |
- entry.congestion); | |
- | |
- out.fill = entry.fill; | |
- return out; | |
+ vtr::Point<int> coord(delta_x - offset_[from_seg_index][size_t(box_id)].first, | |
+ delta_y - offset_[from_seg_index][size_t(box_id)].second); | |
+ vtr::Rect<int> bounds(0, 0, cost_map.dim_size(0), cost_map.dim_size(1)); | |
+ auto closest = closest_point_in_rect(bounds, coord); | |
+ auto cost = cost_map_[from_seg_index][size_t(box_id)][closest.x()][closest.y()]; | |
+ float penalty = penalty_[from_seg_index][size_t(box_id)]; | |
+ auto distance = manhattan_distance(closest, coord); | |
+ return penalize(cost, distance, penalty); | |
} | |
// set the cost map for a segment type and connection box type, filling holes | |
-void CostMap::set_cost_map(const RoutingCosts& costs) { | |
+void CostMap::set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs) { | |
// calculate the bounding boxes | |
vtr::Matrix<vtr::Rect<int>> bounds({seg_count_, box_count_}); | |
- for (const auto& entry : costs) { | |
- bounds[entry.first.seg_index][size_t(entry.first.box_id)] |= vtr::Rect<int>(entry.first.delta, 1); | |
+ for (const auto& entry : delay_costs) { | |
+ bounds[entry.first.seg_index][size_t(entry.first.box_id)].expand_bounding_box(vtr::Rect<int>(entry.first.delta)); | |
+ } | |
+ for (const auto& entry : base_costs) { | |
+ bounds[entry.first.seg_index][size_t(entry.first.box_id)].expand_bounding_box(vtr::Rect<int>(entry.first.delta)); | |
} | |
// store bounds | |
@@ -225,18 +191,27 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { | |
} | |
// store entries into the matrices | |
- for (const auto& entry : costs) { | |
+ for (const auto& entry : delay_costs) { | |
+ int seg = entry.first.seg_index; | |
+ int box = size_t(entry.first.box_id); | |
+ const auto& seg_box_bounds = bounds[seg][box]; | |
+ int x = entry.first.delta.x() - seg_box_bounds.xmin(); | |
+ int y = entry.first.delta.y() - seg_box_bounds.ymin(); | |
+ cost_map_[seg][box][x][y].delay = entry.second; | |
+ } | |
+ for (const auto& entry : base_costs) { | |
int seg = entry.first.seg_index; | |
int box = size_t(entry.first.box_id); | |
const auto& seg_box_bounds = bounds[seg][box]; | |
int x = entry.first.delta.x() - seg_box_bounds.xmin(); | |
int y = entry.first.delta.y() - seg_box_bounds.ymin(); | |
- cost_map_[seg][box][x][y] = entry.second; | |
+ cost_map_[seg][box][x][y].congestion = entry.second; | |
} | |
// fill the holes | |
for (size_t seg = 0; seg < seg_count_; seg++) { | |
for (size_t box = 0; box < box_count_; box++) { | |
+ penalty_[seg][box] = std::numeric_limits<float>::infinity(); | |
const auto& seg_box_bounds = bounds[seg][box]; | |
if (seg_box_bounds.empty()) { | |
continue; | |
@@ -244,7 +219,7 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { | |
auto& matrix = cost_map_[seg][box]; | |
// calculate delay penalty | |
- float min_delay = 0.f, max_delay = 0.f; | |
+ float min_delay = std::numeric_limits<float>::infinity(), max_delay = 0.f; | |
vtr::Point<int> min_location(0, 0), max_location(0, 0); | |
for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { | |
for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { | |
@@ -262,6 +237,7 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { | |
} | |
} | |
float delay_penalty = (max_delay - min_delay) / static_cast<float>(std::max(1, manhattan_distance(max_location, min_location))); | |
+ penalty_[seg][box] = delay_penalty; | |
// find missing cost entries and fill them in by copying a nearby cost entry | |
std::vector<std::tuple<unsigned, unsigned, util::Cost_Entry>> missing; | |
@@ -284,11 +260,10 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { | |
} | |
} | |
} | |
-#if !defined(FILL_LIMIT) | |
if (couldnt_fill) { | |
+ // give up trying to fill an empty matrix | |
break; | |
} | |
-#endif | |
} | |
if (!couldnt_fill) { | |
@@ -303,13 +278,11 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { | |
if (couldnt_fill) { | |
VTR_LOG_WARN("Couldn't fill holes in the cost matrix for %d -> %ld, %d x %d bounding box\n", | |
seg, box, seg_box_bounds.width(), seg_box_bounds.height()); | |
-#if !defined(FILL_LIMIT) | |
for (unsigned y = 0; y < matrix.dim_size(1); y++) { | |
for (unsigned x = 0; x < matrix.dim_size(0); x++) { | |
VTR_ASSERT(!matrix[x][y].valid()); | |
} | |
} | |
-#endif | |
} | |
} | |
} | |
@@ -325,11 +298,11 @@ void CostMap::print(int iseg) const { | |
box_id < device_ctx.connection_boxes.num_connection_box_types(); | |
box_id++) { | |
auto& matrix = cost_map_[iseg][box_id]; | |
- VTR_LOG("cost for box_id = %lu (%zu, %zu)\n", box_id, matrix.dim_size(0), matrix.dim_size(1)); | |
if (matrix.dim_size(0) == 0 || matrix.dim_size(1) == 0) { | |
VTR_LOG("cost EMPTY for box_id = %lu\n", box_id); | |
continue; | |
} | |
+ VTR_LOG("cost for box_id = %lu (%zu, %zu)\n", box_id, matrix.dim_size(0), matrix.dim_size(1)); | |
double sum = 0.0; | |
for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { | |
for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { | |
@@ -387,46 +360,40 @@ std::pair<util::Cost_Entry, int> CostMap::get_nearby_cost_entry(const vtr::NdMat | |
int cy, | |
const vtr::Rect<int>& bounds) { | |
// spiral around (cx, cy) looking for a nearby entry | |
- int n = 1; | |
- bool in_bounds; | |
- util::Cost_Entry entry; | |
- entry.fill = true; | |
+ bool in_bounds = bounds.contains(vtr::Point<int>(cx, cy)); | |
+ if (!in_bounds) { | |
+ return std::make_pair(util::Cost_Entry(), 0); | |
+ } | |
+ int n = 0; | |
+ util::Cost_Entry fill(matrix[cx][cy]); | |
+ fill.fill = true; | |
- do { | |
+ while (in_bounds && !fill.valid()) { | |
+ n++; | |
in_bounds = false; | |
+ util::Cost_Entry min_entry; | |
for (int ox = -n; ox <= n; ox++) { | |
int x = cx + ox; | |
int oy = n - abs(ox); | |
int yp = cy + oy; | |
int yn = cy - oy; | |
if (bounds.contains(vtr::Point<int>(x, yp))) { | |
- assign_min_entry(&entry, matrix[x][yp]); | |
+ assign_min_entry(&min_entry, matrix[x][yp]); | |
in_bounds = true; | |
} | |
if (bounds.contains(vtr::Point<int>(x, yn))) { | |
- assign_min_entry(&entry, matrix[x][yn]); | |
+ assign_min_entry(&min_entry, matrix[x][yn]); | |
in_bounds = true; | |
} | |
} | |
- if (entry.valid()) return std::make_pair(entry, n); | |
- n++; | |
-#if defined(FILL_LIMIT) | |
- if (n > FILL_LIMIT) { | |
- break; | |
+ if (!std::isfinite(fill.delay)) { | |
+ fill.delay = min_entry.delay; | |
} | |
-#endif | |
- } while (in_bounds); | |
- return std::make_pair(util::Cost_Entry(), n); | |
-} | |
- | |
-// sample on a uniformly spaced grid within a rectangle | |
-// sample(vtr::Rect(N), 0, 0, M) is vtr::Point(0, 0) | |
-// sample(vtr::Rect(N), M, M, M) is vtr::Point(N, N) | |
-// To avoid the edges, use `sample(r, x+1, y+1, N+1) for x, y, in 0..N-1 | |
-template<typename T> | |
-vtr::Point<T> sample(const vtr::Rect<T>& r, T x, T y, T d) { | |
- return vtr::Point<T>((r.xmin() * (d - x) + r.xmax() * x + d / 2) / d, | |
- (r.ymin() * (d - y) + r.ymax() * y + d / 2) / d); | |
+ if (!std::isfinite(fill.congestion)) { | |
+ fill.congestion = min_entry.congestion; | |
+ } | |
+ } | |
+ return std::make_pair(fill, n); | |
} | |
// derive a cost from the map between two nodes | |
@@ -479,7 +446,7 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, | |
ssize_t dy = ssize_t(from_canonical_loc->second) - ssize_t(box_location.second); | |
int from_seg_index = cost_map_.node_to_segment(from_node_ind); | |
- util::Cost_Entry cost_entry = cost_map_.find_cost(from_seg_index, box_id, dx, dy, nullptr); | |
+ util::Cost_Entry cost_entry = cost_map_.find_cost(from_seg_index, box_id, dx, dy); | |
if (!cost_entry.valid()) { | |
// there is no route | |
@@ -521,51 +488,41 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, | |
} | |
// add a best cost routing path from start_node_ind to node_ind to routing costs | |
-static void add_paths(int start_node_ind, | |
- int node_ind, | |
+template<typename Entry> | |
+static bool add_paths(int start_node_ind, | |
+ Entry current, | |
std::unordered_map<int, util::Search_Path>* paths, | |
RoutingCosts* routing_costs) { | |
auto& device_ctx = g_vpr_ctx.device(); | |
ConnectionBoxId box_id; | |
std::pair<size_t, size_t> box_location; | |
float site_pin_delay; | |
+ int node_ind = current.rr_node_ind; | |
bool found = device_ctx.connection_boxes.find_connection_box( | |
node_ind, &box_id, &box_location, &site_pin_delay); | |
+ bool new_sample_found = false; | |
if (!found) { | |
VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", node_ind); | |
} | |
// reconstruct the path | |
std::vector<int> path; | |
- for (int i = node_ind; i != start_node_ind; path.push_back(i = (*paths)[i].parent)) | |
- ; | |
- util::PQ_Entry parent_entry(start_node_ind, UNDEFINED, 0, 0, 0, true, /*Tsw_adjust=*/0.f); | |
- | |
- // recalculate the path with congestion | |
- util::PQ_Entry current_full = parent_entry; | |
- int parent = start_node_ind; | |
- for (auto it = path.rbegin(); it != path.rend(); it++) { | |
- auto& parent_node = device_ctx.rr_nodes[parent]; | |
- float Tsw_adjust = 0.f; | |
- | |
- // Remove site pin delay when taking edge from last channel to IPIN. | |
- if (*it == node_ind) { | |
- Tsw_adjust = -site_pin_delay; | |
- } | |
- current_full = util::PQ_Entry(*it, parent_node.edge_switch((*paths)[*it].edge), current_full.delay, | |
- current_full.R_upstream, current_full.congestion_upstream, false, Tsw_adjust); | |
- parent = *it; | |
+ for (int i = (*paths)[node_ind].parent; i != start_node_ind; i = (*paths)[i].parent) { | |
+ path.push_back(i); | |
} | |
+ path.push_back(start_node_ind); | |
+ | |
+ current.adjust_Tsw(-site_pin_delay); | |
// add each node along the path subtracting the incremental costs from the current costs | |
- parent = start_node_ind; | |
+ Entry start_to_here(start_node_ind, UNDEFINED, nullptr); | |
+ int parent = start_node_ind; | |
for (auto it = path.rbegin(); it != path.rend(); it++) { | |
- auto& parent_node = device_ctx.rr_nodes[parent]; | |
- int seg_index = device_ctx.rr_indexed_data[parent_node.cost_index()].seg_index; | |
- const std::pair<size_t, size_t>* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(parent); | |
+ auto& here = device_ctx.rr_nodes[*it]; | |
+ int seg_index = device_ctx.rr_indexed_data[here.cost_index()].seg_index; | |
+ const std::pair<size_t, size_t>* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(*it); | |
if (from_canonical_loc == nullptr) { | |
- VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", | |
- parent); | |
+ VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", *it); | |
} | |
vtr::Point<int> delta(ssize_t(from_canonical_loc->first) - ssize_t(box_location.first), | |
@@ -574,36 +531,52 @@ static void add_paths(int start_node_ind, | |
seg_index, | |
box_id, | |
delta}; | |
- CompressedRoutingCostKey compressed_key(key); | |
- float new_delay = current_full.delay - parent_entry.delay; | |
- float new_congestion = current_full.congestion_upstream - parent_entry.congestion_upstream; | |
+ if (*it != start_node_ind) { | |
+ auto& parent_node = device_ctx.rr_nodes[parent]; | |
+ start_to_here = Entry(*it, parent_node.edge_switch((*paths)[*it].edge), &start_to_here); | |
+ parent = *it; | |
+ } | |
- VTR_ASSERT(new_delay >= 0.f); | |
- VTR_ASSERT(new_congestion >= 0.f); | |
+ float cost = current.cost() - start_to_here.cost(); | |
+ if (cost < 0.f && cost > -10e-15 /* 10 femtosecond */) { | |
+ cost = 0.f; | |
+ } | |
- auto val = util::Cost_Entry(new_delay, new_congestion); | |
+ VTR_ASSERT(cost >= 0.f); | |
// NOTE: implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST | |
- auto result = routing_costs->insert(std::make_pair(key, val)); | |
+ auto result = routing_costs->insert(std::make_pair(key, cost)); | |
if (!result.second) { | |
- assign_min_entry(&result.first->second, val); | |
+ if (cost < result.first->second) { | |
+ result.first->second = cost; | |
+ new_sample_found = true; | |
+ } else if (BREAK_ON_MISS) { | |
+ break; | |
+ } | |
+ } else { | |
+ new_sample_found = true; | |
} | |
- | |
- // update parent data | |
- parent_entry = util::PQ_Entry(*it, parent_node.edge_switch((*paths)[*it].edge), parent_entry.delay, | |
- parent_entry.R_upstream, parent_entry.congestion_upstream, false, /*Tsw_adjust=*/0.f); | |
- parent = *it; | |
} | |
+ return new_sample_found; | |
} | |
-/* runs Dijkstra's algorithm from specified node until all nodes have been | |
+/* Runs Dijkstra's algorithm from specified node until all nodes have been | |
* visited. Each time a pin is visited, the delay/congestion information | |
- * to that pin is stored to an entry in the routing_cost_map */ | |
+ * to that pin is stored to an entry in the routing_cost_map. | |
+ * | |
+ * Returns the maximum (last) minimum cost path stored, and | |
+ * the number of paths from start_node_ind stored. */ | |
template<typename Entry> | |
-static void run_dijkstra(int start_node_ind, | |
- RoutingCosts* routing_costs) { | |
+static std::pair<float, int> run_dijkstra(int start_node_ind, | |
+ RoutingCosts* routing_costs) { | |
auto& device_ctx = g_vpr_ctx.device(); | |
+ int path_count = 0; | |
+ const std::pair<size_t, size_t>* start_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(start_node_ind); | |
+ if (start_canonical_loc == nullptr) { | |
+ VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", | |
+ start_node_ind); | |
+ } | |
/* a list of boolean flags (one for each rr node) to figure out if a | |
* certain node has already been expanded */ | |
@@ -617,7 +590,8 @@ static void run_dijkstra(int start_node_ind, | |
std::priority_queue<Entry, std::vector<Entry>, std::greater<Entry>> pq; | |
/* first entry has no upstream delay or congestion */ | |
- Entry first_entry(start_node_ind, UNDEFINED, 0, true); | |
+ Entry first_entry(start_node_ind, UNDEFINED, nullptr); | |
+ float max_cost = first_entry.cost(); | |
pq.push(first_entry); | |
@@ -635,13 +609,18 @@ static void run_dijkstra(int start_node_ind, | |
/* if this node is an ipin record its congestion/delay in the routing_cost_map */ | |
if (device_ctx.rr_nodes[node_ind].type() == IPIN) { | |
- add_paths(start_node_ind, node_ind, &paths, routing_costs); | |
+ // the last cost should be the highest | |
+ max_cost = current.cost(); | |
+ | |
+ path_count++; | |
+ add_paths<Entry>(start_node_ind, current, &paths, routing_costs); | |
} else { | |
- expand_dijkstra_neighbours(device_ctx.rr_nodes, | |
- current, paths, node_expanded, pq); | |
+ util::expand_dijkstra_neighbours(device_ctx.rr_nodes, | |
+ current, paths, node_expanded, pq); | |
node_expanded[node_ind] = true; | |
} | |
} | |
+ return std::make_pair(max_cost, path_count); | |
} | |
// compute the cost maps for lookahead | |
@@ -652,30 +631,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector<t_segment_inf>& segmen | |
alloc_and_load_rr_node_route_structs(); | |
size_t num_segments = segment_inf.size(); | |
- std::vector<SamplePoint> sample_points; | |
- { | |
- std::vector<SampleGrid> inodes_for_segment(num_segments); | |
- find_inodes_for_segment_types(&inodes_for_segment); | |
- | |
- // collapse into a vector | |
- for (auto& grid : inodes_for_segment) { | |
- for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { | |
- for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { | |
- auto& point = grid.point[y][x]; | |
- if (!point.samples.empty()) { | |
- point.order = point.samples[0]; | |
- sample_points.push_back(point); | |
- } | |
- } | |
- } | |
- } | |
- } | |
- | |
- // sort by VPR coordinate | |
- std::sort(sample_points.begin(), sample_points.end(), | |
- [](const SamplePoint& a, const SamplePoint& b) { | |
- return a.order < b.order; | |
- }); | |
+ std::vector<SampleRegion> sample_regions = find_sample_regions(num_segments); | |
/* free previous delay map and allocate new one */ | |
auto& device_ctx = g_vpr_ctx.device(); | |
@@ -683,42 +639,87 @@ void ConnectionBoxMapLookahead::compute(const std::vector<t_segment_inf>& segmen | |
device_ctx.connection_boxes.num_connection_box_types()); | |
VTR_ASSERT(REPRESENTATIVE_ENTRY_METHOD == util::SMALLEST); | |
- RoutingCosts all_costs; | |
+ RoutingCosts all_delay_costs; | |
+ RoutingCosts all_base_costs; | |
/* run Dijkstra's algorithm for each segment type & channel type combination */ | |
#if defined(VPR_USE_TBB) | |
tbb::mutex all_costs_mutex; | |
- tbb::parallel_for_each(sample_points, [&](const SamplePoint& point) { | |
+ tbb::parallel_for_each(sample_regions, [&](const SampleRegion& region) { | |
#else | |
- for (const auto& point : sample_points) { | |
+ for (const auto& region : sample_regions) { | |
#endif | |
// holds the cost entries for a run | |
- RoutingCosts costs; | |
+ RoutingCosts delay_costs; | |
+ RoutingCosts base_costs; | |
+ int total_path_count = 0; | |
+ | |
+ for (auto& point : region.points) { | |
+ // statistics | |
+ vtr::Timer run_timer; | |
+ float max_delay_cost = 0.f; | |
+ float max_base_cost = 0.f; | |
+ int path_count = 0; | |
+ for (auto node_ind : point.nodes) { | |
+ { | |
+ auto result = run_dijkstra<util::PQ_Entry_Delay>(node_ind, &delay_costs); | |
+ max_delay_cost = std::max(max_delay_cost, result.first); | |
+ path_count += result.second; | |
+ } | |
+ { | |
+ auto result = run_dijkstra<util::PQ_Entry_Base_Cost>(node_ind, &base_costs); | |
+ max_base_cost = std::max(max_base_cost, result.first); | |
+ path_count += result.second; | |
+ } | |
+ } | |
- for (auto node_ind : point.samples) { | |
- run_dijkstra<util::PQ_Entry_Delay>(node_ind, &costs); | |
- run_dijkstra<util::PQ_Entry_Base_Cost>(node_ind, &costs); | |
+ if (path_count > 0) { | |
+ VTR_LOG_WARN("Expanded %d paths of segment type %s(%d) starting at (%d, %d) from %d segments, max_cost %e %e (%g paths/sec)\n", | |
+ path_count, segment_inf[region.segment_type].name.c_str(), region.segment_type, | |
+ point.location.x(), point.location.y(), | |
+ (int)point.nodes.size(), | |
+ max_delay_cost, max_base_cost, | |
+ path_count / run_timer.elapsed_sec()); | |
+ } | |
+ | |
+ /* | |
+ * if (path_count == 0) { | |
+ * for (auto node_ind : point.nodes) { | |
+ * VTR_LOG("Expanded node %s\n", describe_rr_node(node_ind).c_str()); | |
+ * } | |
+ * } | |
+ */ | |
+ | |
+ total_path_count += path_count; | |
+ if (total_path_count > MIN_PATH_COUNT) { | |
+ break; | |
+ } | |
} | |
#if defined(VPR_USE_TBB) | |
all_costs_mutex.lock(); | |
#endif | |
- for (auto node_ind : point.samples) { | |
- VTR_LOG("Expanded node %s\n", describe_rr_node(node_ind).c_str()); | |
+ if (total_path_count == 0) { | |
+ VTR_LOG("No paths found for sample region %s(%d, %d)\n", | |
+ segment_inf[region.segment_type].name.c_str(), region.grid_location.x(), region.grid_location.y()); | |
} | |
- VTR_LOG("Expanded sample point (%d, %d)\n", | |
- point.location.x(), point.location.y()); | |
- | |
// combine the cost map from this run with the final cost maps for each segment | |
- for (const auto& cost : costs) { | |
- const auto& key = cost.first; | |
+ for (const auto& cost : delay_costs) { | |
const auto& val = cost.second; | |
- auto result = all_costs.insert(std::make_pair(key, val)); | |
+ auto result = all_delay_costs.insert(std::make_pair(cost.first, val)); | |
if (!result.second) { | |
// implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST | |
- assign_min_entry(&result.first->second, val); | |
+ result.first->second = std::min(result.first->second, val); | |
+ } | |
+ } | |
+ for (const auto& cost : base_costs) { | |
+ const auto& val = cost.second; | |
+ auto result = all_base_costs.insert(std::make_pair(cost.first, val)); | |
+ if (!result.second) { | |
+ // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST | |
+ result.first->second = std::min(result.first->second, val); | |
} | |
} | |
@@ -732,7 +733,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector<t_segment_inf>& segmen | |
VTR_LOG("Combining results\n"); | |
/* boil down the cost list in routing_cost_map at each coordinate to a | |
* representative cost entry and store it in the lookahead cost map */ | |
- cost_map_.set_cost_map(all_costs); | |
+ cost_map_.set_cost_map(all_delay_costs, all_base_costs); | |
// diagnostics | |
#if defined(CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_ENTRIES) | |
@@ -785,91 +786,208 @@ float ConnectionBoxMapLookahead::get_expected_cost( | |
} | |
// the smallest bounding box containing a node | |
-static vtr::Rect<int> bounding_box_for_node(const t_rr_node& node) { | |
- return vtr::Rect<int>(node.xlow(), node.ylow(), | |
- node.xhigh() + 1, node.yhigh() + 1); | |
+static vtr::Rect<int> bounding_box_for_node(const ConnectionBoxes& connection_boxes, int node_ind) { | |
+ const std::pair<size_t, size_t>* loc = connection_boxes.find_canonical_loc(node_ind); | |
+ if (loc == nullptr) { | |
+ return vtr::Rect<int>(); | |
+ } else { | |
+ return vtr::Rect<int>(vtr::Point<int>(loc->first, loc->second)); | |
+ } | |
+} | |
+ | |
+static vtr::Rect<int> sample_window(const vtr::Rect<int>& bounding_box, int sx, int sy, int n) { | |
+ return vtr::Rect<int>(sample(bounding_box, sx, sy, n), | |
+ sample(bounding_box, sx + 1, sy + 1, n)); | |
+} | |
+ | |
+static std::vector<SamplePoint> choose_points(const vtr::Matrix<int>& counts, | |
+ const vtr::Rect<int>& window, | |
+ int min_count, | |
+ int max_count) { | |
+ VTR_ASSERT(min_count <= max_count); | |
+ std::vector<SamplePoint> points; | |
+ for (int y = window.ymin(); y < window.ymax(); y++) { | |
+ for (int x = window.xmin(); x < window.xmax(); x++) { | |
+ if (counts[x][y] >= min_count && counts[x][y] <= max_count) { | |
+ points.push_back(SamplePoint{/* .location = */ vtr::Point<int>(x, y), | |
+ /* .nodes = */ {}}); | |
+ } | |
+ } | |
+ } | |
+ | |
+ vtr::Point<int> center = sample(window, 1, 1, 2); | |
+ | |
+ // sort by distance from center | |
+ std::sort(points.begin(), points.end(), | |
+ [&](const SamplePoint& a, const SamplePoint& b) { | |
+ return manhattan_distance(a.location, center) < manhattan_distance(b.location, center); | |
+ }); | |
+ | |
+ return points; | |
+} | |
+ | |
+// histogram is a map from segment count to number of locations having that count | |
+static int quantile(const std::map<int, int>& histogram, float ratio) { | |
+ if (histogram.empty()) { | |
+ return 0; | |
+ } | |
+ int sum = 0; | |
+ for (const auto& entry : histogram) { | |
+ sum += entry.second; | |
+ } | |
+ int limit = std::ceil(sum * ratio); | |
+ for (const auto& entry : histogram) { | |
+ limit -= entry.second; | |
+ if (limit <= 0) { | |
+ return entry.first; | |
+ } | |
+ } | |
+ return 0; | |
+} | |
+ | |
+// select a good number of segments to find | |
+static std::map<int, int> count_histogram(const vtr::Rect<int>& box, const vtr::Matrix<int>& counts) { | |
+ std::map<int, int> histogram; | |
+ for (int y = box.ymin(); y < box.ymax(); y++) { | |
+ for (int x = box.xmin(); x < box.xmax(); x++) { | |
+ int count = counts[x][y]; | |
+ if (count > 0) { | |
+ ++histogram[count]; | |
+ } | |
+ } | |
+ } | |
+ return histogram; | |
} | |
-// the center point for a node | |
-// it is unknown where the the node starts, so use the average | |
-static vtr::Point<int> point_for_node(const t_rr_node& node) { | |
- int x = (node.xhigh() + node.xlow()) / 2; | |
- int y = (node.yhigh() + node.ylow()) / 2; | |
- return vtr::Point<int>(x, y); | |
+// Used to calculate each region's `order.' | |
+// A space-filling curve will order the regions so that | |
+// nearby points stay close in order. A Hilbert curve might | |
+// be better, but a Morton (Z)-order curve is easy to compute, | |
+// because it's just interleaving binary bits, so this | |
+// function interleaves with 0's so that the X and Y | |
+// dimensions can then be OR'ed together. | |
+static uint64_t interleave(uint32_t x) { | |
+ uint64_t i = x; | |
+ i = (i ^ (i << 16)) & 0x0000ffff0000ffff; | |
+ i = (i ^ (i << 8)) & 0x00ff00ff00ff00ff; | |
+ i = (i ^ (i << 4)) & 0x0f0f0f0f0f0f0f0f; | |
+ i = (i ^ (i << 2)) & 0x3333333333333333; | |
+ i = (i ^ (i << 1)) & 0x5555555555555555; | |
+ return i; | |
} | |
// for each segment type, find the nearest nodes to an equally spaced grid of points | |
// within the bounding box for that segment type | |
-static void find_inodes_for_segment_types(std::vector<SampleGrid>* inodes_for_segment) { | |
+static std::vector<SampleRegion> find_sample_regions(int num_segments) { | |
+ vtr::ScopedStartFinishTimer timer("finding sample regions"); | |
+ std::vector<SampleRegion> sample_regions; | |
auto& device_ctx = g_vpr_ctx.device(); | |
auto& rr_nodes = device_ctx.rr_nodes; | |
- const int num_segments = inodes_for_segment->size(); | |
+ std::vector<vtr::Matrix<int>> segment_counts(num_segments); | |
// compute bounding boxes for each segment type | |
std::vector<vtr::Rect<int>> bounding_box_for_segment(num_segments, vtr::Rect<int>()); | |
for (size_t i = 0; i < rr_nodes.size(); i++) { | |
auto& node = rr_nodes[i]; | |
if (node.type() != CHANX && node.type() != CHANY) continue; | |
+ if (node.capacity() == 0 || node.num_edges() == 0) continue; | |
int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; | |
VTR_ASSERT(seg_index != OPEN); | |
VTR_ASSERT(seg_index < num_segments); | |
- bounding_box_for_segment[seg_index] |= bounding_box_for_node(node); | |
+ bounding_box_for_segment[seg_index].expand_bounding_box(bounding_box_for_node(device_ctx.connection_boxes, i)); | |
} | |
- // select an inode near the center of the bounding box for each segment type | |
- inodes_for_segment->clear(); | |
- inodes_for_segment->resize(num_segments); | |
- for (auto& grid : *inodes_for_segment) { | |
- for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { | |
- for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { | |
- grid.point[y][x].samples = std::vector<ssize_t>(); | |
- } | |
- } | |
+ // initialize counts | |
+ for (int seg = 0; seg < num_segments; seg++) { | |
+ const auto& box = bounding_box_for_segment[seg]; | |
+ segment_counts[seg] = vtr::Matrix<int>({size_t(box.width()), size_t(box.height())}, 0); | |
} | |
+ // count sample points | |
for (size_t i = 0; i < rr_nodes.size(); i++) { | |
auto& node = rr_nodes[i]; | |
- vtr::Rect<int> node_bounds = bounding_box_for_node(node); | |
if (node.type() != CHANX && node.type() != CHANY) continue; | |
- if (node.capacity() == 0 || device_ctx.connection_boxes.find_canonical_loc(i) == nullptr) continue; | |
+ if (node.capacity() == 0 || node.num_edges() == 0) continue; | |
+ const std::pair<size_t, size_t>* loc = device_ctx.connection_boxes.find_canonical_loc(i); | |
+ if (loc == nullptr) continue; | |
int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; | |
+ segment_counts[seg_index][loc->first][loc->second] += 1; | |
VTR_ASSERT(seg_index != OPEN); | |
VTR_ASSERT(seg_index < num_segments); | |
+ } | |
- auto& grid = (*inodes_for_segment)[seg_index]; | |
- for (int sy = 0; sy < SAMPLE_GRID_SIZE; sy++) { | |
- for (int sx = 0; sx < SAMPLE_GRID_SIZE; sx++) { | |
- auto& point = grid.point[sy][sx]; | |
- if (point.samples.empty()) { | |
- point.samples.push_back(i); | |
- point.location = vtr::Point<int>(node.xlow(), node.ylow()); | |
- goto next_rr_node; | |
+ // select sample points | |
+ for (int i = 0; i < num_segments; i++) { | |
+ const auto& counts = segment_counts[i]; | |
+ const auto& bounding_box = bounding_box_for_segment[i]; | |
+ if (bounding_box.empty()) continue; | |
+ for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { | |
+ for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { | |
+ vtr::Rect<int> window = sample_window(bounding_box, x, y, SAMPLE_GRID_SIZE); | |
+ auto histogram = count_histogram(window, segment_counts[i]); | |
+ SampleRegion region = { | |
+ /* .segment_type = */ i, | |
+ /* .grid_location = */ vtr::Point<int>(x, y), | |
+ /* .points = */ choose_points(counts, window, quantile(histogram, kSamplingCountLowerQuantile), quantile(histogram, kSamplingCountUpperQuantile)), | |
+ /* .order = */ 0}; | |
+ if (!region.points.empty()) { | |
+ /* In order to improve caching, the list of sample points are | |
+ * sorted to keep points that are nearby on the Euclidean plane also | |
+ * nearby in the vector of sample points. | |
+ * | |
+ * This means subsequent expansions on the same thread are likely | |
+ * to cover a similar set of nodes, so they are more likely to be | |
+ * cached. This improves performance by about 7%, which isn't a lot, | |
+ * but not a bad improvement for a few lines of code. */ | |
+ vtr::Point<int> location = region.points[0].location; | |
+ | |
+ // interleave bits of X and Y for a Z-curve ordering. | |
+ region.order = interleave(location.x()) | (interleave(location.y()) << 1); | |
+ | |
+ sample_regions.push_back(region); | |
} | |
+ } | |
+ } | |
+ } | |
- if (node_bounds.contains(point.location)) { | |
- point.samples.push_back(i); | |
- goto next_rr_node; | |
- } | |
+ // sort regions | |
+ std::sort(sample_regions.begin(), sample_regions.end(), | |
+ [](const SampleRegion& a, const SampleRegion& b) { | |
+ return a.order < b.order; | |
+ }); | |
- vtr::Point<int> target = sample(bounding_box_for_segment[seg_index], sx + 1, sy + 1, SAMPLE_GRID_SIZE + 1); | |
- int distance_new = manhattan_distance(point_for_node(node), target); | |
- int distance_stored = manhattan_distance(point.location, target); | |
- if (distance_new < distance_stored) { | |
- point.samples.clear(); | |
- point.samples.push_back(i); | |
- goto next_rr_node; | |
- } | |
- } | |
+ // build an index of sample points on segment type and location | |
+ std::map<std::tuple<int, int, int>, SamplePoint*> sample_point_index; | |
+ for (auto& region : sample_regions) { | |
+ for (auto& point : region.points) { | |
+ sample_point_index[std::make_tuple(region.segment_type, point.location.x(), point.location.y())] = &point; | |
} | |
+ } | |
+ | |
+ // collect the node indices for each segment type at the selected sample points | |
+ for (size_t i = 0; i < rr_nodes.size(); i++) { | |
+ auto& node = rr_nodes[i]; | |
+ if (node.type() != CHANX && node.type() != CHANY) continue; | |
+ if (node.capacity() == 0 || node.num_edges() == 0) continue; | |
+ const std::pair<size_t, size_t>* loc = device_ctx.connection_boxes.find_canonical_loc(i); | |
+ if (loc == nullptr) continue; | |
+ | |
+ int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; | |
+ | |
+ VTR_ASSERT(seg_index != OPEN); | |
+ VTR_ASSERT(seg_index < num_segments); | |
- // to break out from the inner loop | |
- next_rr_node: | |
- continue; | |
+ auto point = sample_point_index.find(std::make_tuple(seg_index, loc->first, loc->second)); | |
+ if (point != sample_point_index.end()) { | |
+ point->second->nodes.push_back(i); | |
+ } | |
} | |
+ | |
+ return sample_regions; | |
} | |
#ifndef VTR_ENABLE_CAPNPROTO | |
@@ -893,6 +1011,7 @@ void ConnectionBoxMapLookahead::write(const std::string& file) const { | |
static void ToCostEntry(util::Cost_Entry* out, const VprCostEntry::Reader& in) { | |
out->delay = in.getDelay(); | |
out->congestion = in.getCongestion(); | |
+ out->fill = in.getFill(); | |
} | |
static void FromCostEntry(VprCostEntry::Builder* out, const util::Cost_Entry& in) { | |
@@ -922,6 +1041,16 @@ static void FromMatrixCostEntry( | |
out, in, FromCostEntry); | |
} | |
+static void ToFloat(float* out, const VprFloatEntry::Reader& in) { | |
+ // Getting a scalar field is always "get<field name>()". | |
+ *out = in.getValue(); | |
+} | |
+ | |
+static void FromFloat(VprFloatEntry::Builder* out, const float& in) { | |
+ // Setting a scalar field is always "set<field name>(value)". | |
+ out->setValue(in); | |
+} | |
+ | |
void CostMap::read(const std::string& file) { | |
MmapFile f(file); | |
@@ -952,6 +1081,12 @@ void CostMap::read(const std::string& file) { | |
ToNdMatrix<2, Matrix<VprCostEntry>, vtr::NdMatrix<util::Cost_Entry, 2>>( | |
&cost_map_, cost_maps, ToMatrixCostEntry); | |
} | |
+ | |
+ { | |
+ const auto& penalty = cost_map.getPenalty(); | |
+ ToNdMatrix<2, VprFloatEntry, float>( | |
+ &penalty_, penalty, ToFloat); | |
+ } | |
} | |
void CostMap::write(const std::string& file) const { | |
@@ -978,6 +1113,12 @@ void CostMap::write(const std::string& file) const { | |
&cost_maps, cost_map_, FromMatrixCostEntry); | |
} | |
+ { | |
+ auto penalty = cost_map.initPenalty(); | |
+ FromNdMatrix<2, VprFloatEntry, float>( | |
+ &penalty, penalty_, FromFloat); | |
+ } | |
+ | |
writeMessageToFile(file, &builder); | |
} | |
#endif | |
diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h | |
index 960ef84f2..70423409f 100644 | |
--- a/vpr/src/route/connection_box_lookahead_map.h | |
+++ b/vpr/src/route/connection_box_lookahead_map.h | |
@@ -32,54 +32,27 @@ struct RoutingCostKey { | |
} | |
}; | |
-// compressed version of RoutingCostKey | |
-// TODO add bounds checks | |
-struct CompressedRoutingCostKey { | |
- uint32_t data; | |
- | |
- CompressedRoutingCostKey() { | |
- data = -1; | |
- } | |
- CompressedRoutingCostKey(const RoutingCostKey& key) { | |
- data = key.seg_index & 0xff; | |
- data <<= 8; | |
- data |= size_t(key.box_id) & 0xff; | |
- data <<= 8; | |
- data |= key.delta.x() & 0xff; | |
- data <<= 8; | |
- data |= key.delta.y() & 0xff; | |
- } | |
- | |
- bool operator==(CompressedRoutingCostKey other) const { | |
- return data == other.data; | |
- } | |
-}; | |
- | |
// hash implementation for RoutingCostKey | |
struct HashRoutingCostKey { | |
std::size_t operator()(RoutingCostKey const& key) const noexcept { | |
- uint64_t data; | |
- data = key.seg_index & 0xffff; | |
- data <<= 16; | |
- data |= size_t(key.box_id) & 0xffff; | |
- data <<= 16; | |
- data |= key.delta.x() & 0xffff; | |
- data <<= 16; | |
- data |= key.delta.y() & 0xffff; | |
- return std::hash<uint64_t>{}(data); | |
+ std::size_t hash = std::hash<int>{}(key.seg_index); | |
+ vtr::hash_combine(hash, key.box_id); | |
+ vtr::hash_combine(hash, key.delta.x()); | |
+ vtr::hash_combine(hash, key.delta.y()); | |
+ return hash; | |
} | |
}; | |
// Map used to store intermediate routing costs | |
-typedef std::unordered_map<RoutingCostKey, util::Cost_Entry, HashRoutingCostKey> RoutingCosts; | |
+typedef std::unordered_map<RoutingCostKey, float, HashRoutingCostKey> RoutingCosts; | |
// Dense cost maps per source segment and destination connection box types | |
class CostMap { | |
public: | |
void set_counts(size_t seg_count, size_t box_count); | |
int node_to_segment(int from_node_ind) const; | |
- util::Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y, int* out_of_bounds_val) const; | |
- void set_cost_map(const RoutingCosts& costs); | |
+ util::Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const; | |
+ void set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs); | |
std::pair<util::Cost_Entry, int> get_nearby_cost_entry(const vtr::NdMatrix<util::Cost_Entry, 2>& matrix, int cx, int cy, const vtr::Rect<int>& bounds); | |
void read(const std::string& file); | |
void write(const std::string& file) const; | |
@@ -87,8 +60,9 @@ class CostMap { | |
std::vector<std::pair<int, int>> list_empty() const; | |
private: | |
- vtr::NdMatrix<vtr::NdMatrix<util::Cost_Entry, 2>, 2> cost_map_; | |
- vtr::NdMatrix<std::pair<int, int>, 2> offset_; | |
+ vtr::Matrix<vtr::Matrix<util::Cost_Entry>> cost_map_; | |
+ vtr::Matrix<std::pair<int, int>> offset_; | |
+ vtr::Matrix<float> penalty_; | |
std::vector<int> segment_map_; | |
size_t seg_count_; | |
size_t box_count_; | |
diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp | |
index 6226662e7..081d62760 100644 | |
--- a/vpr/src/route/router_lookahead_map_utils.cpp | |
+++ b/vpr/src/route/router_lookahead_map_utils.cpp | |
@@ -5,7 +5,9 @@ | |
#include "vtr_math.h" | |
#include "route_common.h" | |
-util::PQ_Entry::PQ_Entry( | |
+namespace util { | |
+ | |
+PQ_Entry::PQ_Entry( | |
int set_rr_node_ind, | |
int switch_ind, | |
float parent_delay, | |
@@ -53,13 +55,11 @@ util::PQ_Entry::PQ_Entry( | |
util::PQ_Entry_Delay::PQ_Entry_Delay( | |
int set_rr_node_ind, | |
int switch_ind, | |
- float parent_delay, | |
- bool starting_node) { | |
+ const util::PQ_Entry_Delay* parent) { | |
this->rr_node_ind = set_rr_node_ind; | |
- auto& device_ctx = g_vpr_ctx.device(); | |
- this->delay_cost = parent_delay; | |
- if (!starting_node) { | |
+ if (parent != nullptr) { | |
+ auto& device_ctx = g_vpr_ctx.device(); | |
float Tsw = device_ctx.rr_switch_inf[switch_ind].Tdel; | |
float Rsw = device_ctx.rr_switch_inf[switch_ind].R; | |
float Cnode = device_ctx.rr_nodes[set_rr_node_ind].C(); | |
@@ -73,23 +73,27 @@ util::PQ_Entry_Delay::PQ_Entry_Delay( | |
} | |
VTR_ASSERT(T_linear >= 0.); | |
- this->delay_cost += T_linear; | |
+ this->delay_cost = parent->delay_cost + T_linear; | |
+ } else { | |
+ this->delay_cost = 0.f; | |
} | |
} | |
util::PQ_Entry_Base_Cost::PQ_Entry_Base_Cost( | |
int set_rr_node_ind, | |
int switch_ind, | |
- float upstream_base_costs, | |
- bool starting_node) { | |
+ const util::PQ_Entry_Base_Cost* parent) { | |
this->rr_node_ind = set_rr_node_ind; | |
- auto& device_ctx = g_vpr_ctx.device(); | |
- this->base_cost = upstream_base_costs; | |
- if (!starting_node) { | |
+ if (parent != nullptr) { | |
+ auto& device_ctx = g_vpr_ctx.device(); | |
if (device_ctx.rr_switch_inf[switch_ind].configurable()) { | |
- this->base_cost += get_rr_cong_cost(set_rr_node_ind); | |
+ this->base_cost = parent->base_cost + get_rr_cong_cost(set_rr_node_ind); | |
+ } else { | |
+ this->base_cost = parent->base_cost; | |
} | |
+ } else { | |
+ this->base_cost = 0.f; | |
} | |
} | |
@@ -188,3 +192,59 @@ util::Cost_Entry util::Expansion_Cost_Entry::get_median_entry() const { | |
return representative_entry; | |
} | |
+ | |
+template<typename Entry> | |
+void expand_dijkstra_neighbours(const std::vector<t_rr_node>& rr_nodes, | |
+ const Entry& parent_entry, | |
+ std::unordered_map<int, util::Search_Path>& paths, | |
+ std::vector<bool>& node_expanded, | |
+ std::priority_queue<Entry, | |
+ std::vector<Entry>, | |
+ std::greater<Entry>>& pq) { | |
+ int parent_ind = parent_entry.rr_node_ind; | |
+ | |
+ auto& parent_node = rr_nodes[parent_ind]; | |
+ | |
+ for (int iedge = 0; iedge < parent_node.num_edges(); iedge++) { | |
+ int child_node_ind = parent_node.edge_sink_node(iedge); | |
+ int switch_ind = parent_node.edge_switch(iedge); | |
+ | |
+ /* skip this child if it has already been expanded from */ | |
+ if (node_expanded[child_node_ind]) { | |
+ continue; | |
+ } | |
+ | |
+ Entry child_entry(child_node_ind, switch_ind, &parent_entry); | |
+ VTR_ASSERT(child_entry.cost() >= 0); | |
+ pq.push(child_entry); | |
+ | |
+ /* Create (if it doesn't exist) or update (if the new cost is lower) | |
+ * to specified node */ | |
+ Search_Path path_entry = {child_entry.cost(), parent_ind, iedge}; | |
+ auto result = paths.insert(std::make_pair( | |
+ child_node_ind, | |
+ path_entry)); | |
+ if (!result.second) { | |
+ if (child_entry.cost() < result.first->second.cost) { | |
+ result.first->second = path_entry; | |
+ } | |
+ } | |
+ } | |
+} | |
+ | |
+template void expand_dijkstra_neighbours(const std::vector<t_rr_node>& rr_nodes, | |
+ const PQ_Entry_Delay& parent_entry, | |
+ std::unordered_map<int, Search_Path>& paths, | |
+ std::vector<bool>& node_expanded, | |
+ std::priority_queue<PQ_Entry_Delay, | |
+ std::vector<PQ_Entry_Delay>, | |
+ std::greater<PQ_Entry_Delay>>& pq); | |
+template void expand_dijkstra_neighbours(const std::vector<t_rr_node>& rr_nodes, | |
+ const PQ_Entry_Base_Cost& parent_entry, | |
+ std::unordered_map<int, Search_Path>& paths, | |
+ std::vector<bool>& node_expanded, | |
+ std::priority_queue<PQ_Entry_Base_Cost, | |
+ std::vector<PQ_Entry_Base_Cost>, | |
+ std::greater<PQ_Entry_Base_Cost>>& pq); | |
+ | |
+} // namespace util | |
diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h | |
index 0b39c434f..36e6ea510 100644 | |
--- a/vpr/src/route/router_lookahead_map_utils.h | |
+++ b/vpr/src/route/router_lookahead_map_utils.h | |
@@ -49,12 +49,14 @@ class Cost_Entry { | |
congestion = std::numeric_limits<float>::infinity(); | |
fill = false; | |
} | |
- Cost_Entry(float set_delay, float set_congestion) { | |
- delay = set_delay; | |
- congestion = set_congestion; | |
- fill = false; | |
- } | |
- | |
+ Cost_Entry(float set_delay, float set_congestion) | |
+ : delay(set_delay) | |
+ , congestion(set_congestion) | |
+ , fill(false) {} | |
+ Cost_Entry(float set_delay, float set_congestion, bool set_fill) | |
+ : delay(set_delay) | |
+ , congestion(set_congestion) | |
+ , fill(set_fill) {} | |
bool valid() const { | |
return std::isfinite(delay) && std::isfinite(congestion); | |
} | |
@@ -141,34 +143,43 @@ class PQ_Entry { | |
} | |
}; | |
-// A version of PQ_Entry that only calculates and stores the delay (cost.) | |
+// A version of PQ_Entry that only calculates and stores the delay. | |
class PQ_Entry_Delay { | |
public: | |
int rr_node_ind; //index in device_ctx.rr_nodes that this entry represents | |
float delay_cost; //the cost of the path to get to this node | |
- PQ_Entry_Delay(int set_rr_node_ind, int /*switch_ind*/, float parent_cost, bool starting_node); | |
+ PQ_Entry_Delay(int set_rr_node_ind, int /*switch_ind*/, const PQ_Entry_Delay* parent); | |
float cost() const { | |
return delay_cost; | |
} | |
+ void adjust_Tsw(float amount) { | |
+ delay_cost += amount; | |
+ } | |
+ | |
bool operator>(const PQ_Entry_Delay& obj) const { | |
return (this->delay_cost > obj.delay_cost); | |
} | |
}; | |
+// A version of PQ_Entry that only calculates and stores the base cost. | |
class PQ_Entry_Base_Cost { | |
public: | |
int rr_node_ind; //index in device_ctx.rr_nodes that this entry represents | |
float base_cost; | |
- PQ_Entry_Base_Cost(int set_rr_node_ind, int /*switch_ind*/, float parent_cost, bool starting_node); | |
+ PQ_Entry_Base_Cost(int set_rr_node_ind, int /*switch_ind*/, const PQ_Entry_Base_Cost* parent); | |
float cost() const { | |
return base_cost; | |
} | |
+ void adjust_Tsw(float /* amount */) { | |
+ // do nothing | |
+ } | |
+ | |
bool operator>(const PQ_Entry_Base_Cost& obj) const { | |
return (this->base_cost > obj.base_cost); | |
} | |
@@ -180,46 +191,16 @@ struct Search_Path { | |
int edge; | |
}; | |
-} // namespace util | |
- | |
/* iterates over the children of the specified node and selectively pushes them onto the priority queue */ | |
template<typename Entry> | |
void expand_dijkstra_neighbours(const std::vector<t_rr_node>& rr_nodes, | |
const Entry& parent_entry, | |
- std::unordered_map<int, util::Search_Path>& paths, | |
+ std::unordered_map<int, Search_Path>& paths, | |
std::vector<bool>& node_expanded, | |
std::priority_queue<Entry, | |
std::vector<Entry>, | |
- std::greater<Entry>>& pq) { | |
- int parent_ind = parent_entry.rr_node_ind; | |
+ std::greater<Entry>>& pq); | |
- auto& parent_node = rr_nodes[parent_ind]; | |
- | |
- for (int iedge = 0; iedge < parent_node.num_edges(); iedge++) { | |
- int child_node_ind = parent_node.edge_sink_node(iedge); | |
- int switch_ind = parent_node.edge_switch(iedge); | |
- | |
- /* skip this child if it has already been expanded from */ | |
- if (node_expanded[child_node_ind]) { | |
- continue; | |
- } | |
- | |
- Entry child_entry(child_node_ind, switch_ind, parent_entry.cost(), false); | |
- VTR_ASSERT(child_entry.cost() >= 0); | |
- pq.push(child_entry); | |
- | |
- /* Create (if it doesn't exist) or update (if the new cost is lower) | |
- * to specified node */ | |
- util::Search_Path path_entry = {child_entry.cost(), parent_ind, iedge}; | |
- auto result = paths.insert(std::make_pair( | |
- child_node_ind, | |
- path_entry)); | |
- if (!result.second) { | |
- if (child_entry.cost() < result.first->second.cost) { | |
- result.first->second = path_entry; | |
- } | |
- } | |
- } | |
-} | |
+} // namespace util | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment