Skip to content

Instantly share code, notes, and snippets.

@HackerFoo
Created December 5, 2019 18:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save HackerFoo/a5b4923429610b00c49e0946cd71550d to your computer and use it in GitHub Desktop.
Save HackerFoo/a5b4923429610b00c49e0946cd71550d to your computer and use it in GitHub Desktop.
diff --git a/libs/libvtrcapnproto/connection_map.capnp b/libs/libvtrcapnproto/connection_map.capnp
index a0192a57c..30b986415 100644
--- a/libs/libvtrcapnproto/connection_map.capnp
+++ b/libs/libvtrcapnproto/connection_map.capnp
@@ -13,8 +13,13 @@ struct VprVector2D {
y @1 :Int64;
}
+struct VprFloatEntry {
+ value @0 :Float32;
+}
+
struct VprCostMap {
costMap @0 :Matrix.Matrix((Matrix.Matrix(VprCostEntry)));
offset @1 :Matrix.Matrix(VprVector2D);
segmentMap @2 :List(Int64);
+ penalty @3 :Matrix.Matrix(VprFloatEntry);
}
diff --git a/libs/libvtrutil/src/vtr_geometry.h b/libs/libvtrutil/src/vtr_geometry.h
index 38563f518..0545581b3 100644
--- a/libs/libvtrutil/src/vtr_geometry.h
+++ b/libs/libvtrutil/src/vtr_geometry.h
@@ -1,10 +1,12 @@
#ifndef VTR_GEOMETRY_H
#define VTR_GEOMETRY_H
#include "vtr_range.h"
+#include "vtr_assert.h"
#include <vector>
#include <tuple>
#include <limits>
+#include <type_traits>
namespace vtr {
@@ -78,7 +80,13 @@ class Rect {
Rect();
Rect(T left_val, T bottom_val, T right_val, T top_val);
Rect(Point<T> bottom_left_val, Point<T> top_right_val);
- Rect(Point<T> bottom_left_val, T size);
+
+ //Constructs a rectangle that only contains the given point
+ // Rect(p1).contains(p2) => p1 == p2
+ //It is only enabled for integral types, because making this work for floating point types would be difficult and brittle.
+ //The following line only enables the constructor if std::is_integral<T>::value == true
+ template<typename U = T, typename std::enable_if<std::is_integral<U>::value>::type...>
+ Rect(Point<U> point);
public: //Accessors
//Co-ordinates
@@ -103,16 +111,13 @@ class Rect {
bool coincident(Point<T> point) const;
//Returns true if no points are contained in the rectangle
+ // rect.empty() => not exists p. rect.contains(p)
+ // This also implies either the width or height is 0.
bool empty() const;
friend bool operator== <>(const Rect<T>& lhs, const Rect<T>& rhs);
friend bool operator!= <>(const Rect<T>& lhs, const Rect<T>& rhs);
- template<class U>
- friend Rect<U> operator|(const Rect<U>& lhs, const Rect<U>& rhs);
- template<class U>
- friend Rect<U>& operator|=(Rect<U>& lhs, const Rect<U>& rhs);
-
public: //Mutators
//Co-ordinates
void set_xmin(T xmin_val);
@@ -120,11 +125,27 @@ class Rect {
void set_xmax(T xmax_val);
void set_ymax(T ymax_val);
+ //Equivalent to `*this = bounding_box(*this, other)`
+ Rect<T>& expand_bounding_box(const Rect<T>& other);
+
private:
Point<T> bottom_left_;
Point<T> top_right_;
};
+//Return the smallest rectangle containing both given rectangles
+//Note that this isn't a union and the resulting rectangle may include points not in either given rectangle
+template<class T>
+Rect<T> bounding_box(const Rect<T>& lhs, const Rect<T>& rhs);
+
+//Sample on a uniformly spaced grid within a rectangle
+// sample(vtr::Rect(l, h), 0, 0, M) == l
+// sample(vtr::Rect(l, h), M, M, M) == h
+//To avoid the edges, use `sample(r, x+1, y+1, N+1) for x, y, in 0..N-1
+//Only defined for integral types
+template<typename T, typename std::enable_if<std::is_integral<T>::value>::type...>
+Point<T> sample(const vtr::Rect<T>& r, T x, T y, T d);
+
//A 2D line
template<class T>
class Line {
diff --git a/libs/libvtrutil/src/vtr_geometry.tpp b/libs/libvtrutil/src/vtr_geometry.tpp
index 00a6decc2..5fd065b31 100644
--- a/libs/libvtrutil/src/vtr_geometry.tpp
+++ b/libs/libvtrutil/src/vtr_geometry.tpp
@@ -80,11 +80,13 @@ Rect<T>::Rect(Point<T> bottom_left_val, Point<T> top_right_val)
//pass
}
+//Only defined for integral types
template<class T>
-Rect<T>::Rect(Point<T> bottom_left_val, T size)
- : bottom_left_(bottom_left_val)
- , top_right_(bottom_left_val.x() + size,
- bottom_left_val.y() + size) {
+template<typename U, typename std::enable_if<std::is_integral<U>::value>::type...>
+Rect<T>::Rect(Point<U> point)
+ : bottom_left_(point)
+ , top_right_(point.x() + 1,
+ point.y() + 1) {
//pass
}
@@ -165,6 +167,22 @@ bool operator!=(const Rect<T>& lhs, const Rect<T>& rhs) {
return !(lhs == rhs);
}
+template<class T>
+Rect<T> bounding_box(const Rect<T>& lhs, const Rect<T>& rhs) {
+ return Rect<T>(std::min(lhs.xmin(), rhs.xmin()),
+ std::min(lhs.ymin(), rhs.ymin()),
+ std::max(lhs.xmax(), rhs.xmax()),
+ std::max(lhs.ymax(), rhs.ymax()));
+}
+
+//Only defined for integral types
+template<typename T, typename std::enable_if<std::is_integral<T>::value>::type...>
+Point<T> sample(const vtr::Rect<T>& r, T x, T y, T d) {
+ VTR_ASSERT(d > 0);
+ return Point<T>((r.xmin() * (d - x) + r.xmax() * x + d / 2) / d,
+ (r.ymin() * (d - y) + r.ymax() * y + d / 2) / d);
+}
+
template<class T>
void Rect<T>::set_xmin(T xmin_val) {
bottom_left_.set_x(xmin_val);
@@ -186,16 +204,9 @@ void Rect<T>::set_ymax(T ymax_val) {
}
template<class T>
-Rect<T> operator|(const Rect<T>& lhs, const Rect<T>& rhs) {
- return Rect<T>(std::min(lhs.xmin(), rhs.xmin()),
- std::min(lhs.ymin(), rhs.ymin()),
- std::max(lhs.xmax(), rhs.xmax()),
- std::max(lhs.ymax(), rhs.ymax()));
-}
-
-template<class T>
-Rect<T>& operator|=(Rect<T>& lhs, const Rect<T>& rhs) {
- return lhs = lhs | rhs;
+Rect<T>& Rect<T>::expand_bounding_box(const Rect<T>& other) {
+ *this = bounding_box(*this, other);
+ return *this;
}
/*
diff --git a/libs/libvtrutil/test/test_geometry.cpp b/libs/libvtrutil/test/test_geometry.cpp
index d005a4e26..8d3d91f44 100644
--- a/libs/libvtrutil/test/test_geometry.cpp
+++ b/libs/libvtrutil/test/test_geometry.cpp
@@ -61,6 +61,7 @@ TEST_CASE("Rect", "[vtr_geometry/Rect]") {
REQUIRE(r2.contains({6, 4}));
REQUIRE_FALSE(r2.contains({100, 4}));
REQUIRE_FALSE(r2.contains(pi_2));
+ REQUIRE(vtr::Rect<int>(pi_1).contains(pi_1));
}
SECTION("strictly_contains_int") {
@@ -78,7 +79,19 @@ TEST_CASE("Rect", "[vtr_geometry/Rect]") {
}
SECTION("bounds_int") {
- REQUIRE(r1 == (r3 | r4));
+ REQUIRE(r1 == bounding_box(r3, r4));
+ }
+
+ SECTION("empty_int") {
+ REQUIRE(vtr::Rect<int>().empty());
+ }
+
+ SECTION("sample_int") {
+ auto r = vtr::Rect<int>(pi_1, pi_2);
+ REQUIRE(sample(r, 0, 0, 17) == pi_1);
+ REQUIRE(sample(r, 17, 17, 17) == pi_2);
+ auto inside = sample(r, 3, 11, 17);
+ REQUIRE(r.contains(inside));
}
}
@@ -92,6 +105,7 @@ TEST_CASE("Rect", "[vtr_geometry/Rect]") {
vtr::Rect<float> r4(pf_1, pf_2);
vtr::Rect<float> r5(pf_1, pf_3);
vtr::Rect<float> r6(pf_3, pf_2);
+ // vtr::Rect<float> r7(pf_1); // <-- will fail to compile
SECTION("equality_float") {
REQUIRE(r3 == r4);
@@ -140,7 +154,11 @@ TEST_CASE("Rect", "[vtr_geometry/Rect]") {
}
SECTION("bounds_float") {
- REQUIRE(r3 == (r5 | r6));
+ REQUIRE(r3 == bounding_box(r5, r6));
+ }
+
+ SECTION("empty_float") {
+ REQUIRE(vtr::Rect<float>().empty());
}
}
}
diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp
index 481d83aac..a6341de4f 100644
--- a/vpr/src/route/connection_box_lookahead_map.cpp
+++ b/vpr/src/route/connection_box_lookahead_map.cpp
@@ -38,101 +38,85 @@
*
* See e_representative_entry_method */
#define REPRESENTATIVE_ENTRY_METHOD util::SMALLEST
-// #define FILL_LIMIT 30
#define CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_MAPS
// Sample based an NxN grid of starting segments, where N = SAMPLE_GRID_SIZE
static constexpr int SAMPLE_GRID_SIZE = 3;
+// Don't continue storing a path after hitting a lower-or-same cost entry.
+static constexpr bool BREAK_ON_MISS = false;
+
// Distance penalties filling are calculated based on available samples, but can be adjusted with this factor.
static constexpr float PENALTY_FACTOR = 1.f;
+static constexpr float PENALTY_MIN = 1e-12f;
+
+static constexpr int MIN_PATH_COUNT = 1000;
+
+// quantiles (like percentiles but 0-1) of segment count to use as a selection criteria
+// choose locations with higher, but not extreme, counts of each segment type
+static constexpr double kSamplingCountLowerQuantile = 0.5;
+static constexpr double kSamplingCountUpperQuantile = 0.7;
// a sample point for a segment type, contains all segments at the VPR location
struct SamplePoint {
- uint64_t order; // used to order sample points
+ // canonical location
vtr::Point<int> location;
- std::vector<ssize_t> samples;
- SamplePoint()
- : location(0, 0) {}
-};
-// a grid of sample points
-struct SampleGrid {
- SamplePoint point[SAMPLE_GRID_SIZE][SAMPLE_GRID_SIZE];
+ // nodes to expand
+ std::vector<ssize_t> nodes;
};
-// implements a simple cache of key(K)/value(V) pairs of N entries
-template<class K, class V, int N>
-class SimpleCache {
- public:
- SimpleCache()
- : pos(0)
- , hits(0)
- , misses(0) {}
-
- // O(N) lookup
- bool get(K key, V* value) {
- for (int i = 0; i < N; i++) {
- auto& k = keys[i];
- if (k == key) {
- auto& v = values[i];
-#if defined(CONNECTION_BOX_LOOKAHEAD_PUSH_BACK_HITS)
- // preserve the found key by pushing it back
- int last = (pos + N - 1) % N;
- std::swap(k, keys[last]);
- std::swap(v, values[last]);
-#endif
- *value = v;
- hits++;
- return true;
- }
- }
- misses++;
- return false;
- }
+struct SampleRegion {
+ // all nodes in `points' have this segment type
+ int segment_type;
- // O(1) insertion (overwriting an older entry)
- void insert(K key, V val) {
- keys[pos] = key;
- values[pos] = val;
- pos = (pos + 1) % N;
- }
+ // location on the sample grid
+ vtr::Point<int> grid_location;
- // ratio of successful lookups
- float hit_ratio() {
- return hits ? static_cast<float>(hits) / static_cast<float>(hits + misses) : 0.f;
- }
+ // locations to try
+ // The computation will keep expanding each of the points
+ // until a number of paths (segment -> connection box) are found.
+ std::vector<SamplePoint> points;
- // ratio of unsuccessful lookups
- float miss_ratio() {
- return misses ? static_cast<float>(misses) / static_cast<float>(hits + misses) : 0.f;
- }
-
- private:
- std::array<K, N> keys; // keep keys together for faster scanning
- std::array<V, N> values;
- size_t pos;
- uint64_t hits;
- uint64_t misses;
+ // used to sort the regions to improve caching
+ uint64_t order;
};
template<typename Entry>
-static void run_dijkstra(int start_node_ind,
- RoutingCosts* routing_costs);
-static void find_inodes_for_segment_types(std::vector<SampleGrid>* inodes_for_segment);
+static std::pair<float, int> run_dijkstra(int start_node_ind,
+ RoutingCosts* routing_costs);
+
+static std::vector<SampleRegion> find_sample_regions(int num_segments);
// also known as the L1 norm
static int manhattan_distance(const vtr::Point<int>& a, const vtr::Point<int>& b) {
return abs(b.x() - a.x()) + abs(b.y() - a.y());
}
+template<class T>
+constexpr const T& clamp(const T& v, const T& lo, const T& hi) {
+ return std::min(std::max(v, lo), hi);
+}
+
+template<typename T>
+static vtr::Point<T> closest_point_in_rect(const vtr::Rect<T>& r, const vtr::Point<T>& p) {
+ if (r.empty()) {
+ return vtr::Point<T>(0, 0);
+ } else {
+ return vtr::Point<T>(clamp<T>(p.x(), r.xmin(), r.xmax() - 1),
+ clamp<T>(p.y(), r.ymin(), r.ymax() - 1));
+ }
+}
+
// resize internal data structures
void CostMap::set_counts(size_t seg_count, size_t box_count) {
cost_map_.clear();
offset_.clear();
+ penalty_.clear();
cost_map_.resize({seg_count, box_count});
offset_.resize({seg_count, box_count});
+ penalty_.resize({seg_count, box_count});
seg_count_ = seg_count;
box_count_ = box_count;
@@ -153,57 +137,39 @@ int CostMap::node_to_segment(int from_node_ind) const {
return segment_map_[from_node_ind];
}
+static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, float penalty) {
+ penalty = std::max(penalty, PENALTY_MIN);
+ return util::Cost_Entry(entry.delay + distance * penalty * PENALTY_FACTOR,
+ entry.congestion, entry.fill);
+}
+
// get a cost entry for a segment type, connection box type, and offset
-util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y, int* out_of_bounds_val) const {
+util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const {
VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size());
const auto& cost_map = cost_map_[from_seg_index][size_t(box_id)];
if (cost_map.dim_size(0) == 0 || cost_map.dim_size(1) == 0) {
return util::Cost_Entry();
}
- int dx = delta_x - offset_[from_seg_index][size_t(box_id)].first;
- int dy = delta_y - offset_[from_seg_index][size_t(box_id)].second;
-
- int out_of_bounds_distance = 0;
- if (dx < 0) {
- out_of_bounds_distance += abs(dx);
- dx = 0;
- }
- if (dy < 0) {
- out_of_bounds_distance += abs(dy);
- dy = 0;
- }
-
- if (dx >= (ssize_t)cost_map.dim_size(0)) {
- out_of_bounds_distance += abs(int(dx) - int(cost_map.dim_size(0) - 1));
- dx = cost_map.dim_size(0) - 1;
- }
- if (dy >= (ssize_t)cost_map.dim_size(1)) {
- out_of_bounds_distance += abs(int(dy) - int(cost_map.dim_size(1) - 1));
- dy = cost_map.dim_size(1) - 1;
- }
-
- if (out_of_bounds_val != nullptr) {
- *out_of_bounds_val = out_of_bounds_distance;
- }
-
- return cost_map_[from_seg_index][size_t(box_id)][dx][dy];
-}
-
-static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, float penalty) {
- auto out = util::Cost_Entry(entry.delay + distance * penalty * PENALTY_FACTOR,
- entry.congestion);
-
- out.fill = entry.fill;
- return out;
+ vtr::Point<int> coord(delta_x - offset_[from_seg_index][size_t(box_id)].first,
+ delta_y - offset_[from_seg_index][size_t(box_id)].second);
+ vtr::Rect<int> bounds(0, 0, cost_map.dim_size(0), cost_map.dim_size(1));
+ auto closest = closest_point_in_rect(bounds, coord);
+ auto cost = cost_map_[from_seg_index][size_t(box_id)][closest.x()][closest.y()];
+ float penalty = penalty_[from_seg_index][size_t(box_id)];
+ auto distance = manhattan_distance(closest, coord);
+ return penalize(cost, distance, penalty);
}
// set the cost map for a segment type and connection box type, filling holes
-void CostMap::set_cost_map(const RoutingCosts& costs) {
+void CostMap::set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs) {
// calculate the bounding boxes
vtr::Matrix<vtr::Rect<int>> bounds({seg_count_, box_count_});
- for (const auto& entry : costs) {
- bounds[entry.first.seg_index][size_t(entry.first.box_id)] |= vtr::Rect<int>(entry.first.delta, 1);
+ for (const auto& entry : delay_costs) {
+ bounds[entry.first.seg_index][size_t(entry.first.box_id)].expand_bounding_box(vtr::Rect<int>(entry.first.delta));
+ }
+ for (const auto& entry : base_costs) {
+ bounds[entry.first.seg_index][size_t(entry.first.box_id)].expand_bounding_box(vtr::Rect<int>(entry.first.delta));
}
// store bounds
@@ -225,18 +191,27 @@ void CostMap::set_cost_map(const RoutingCosts& costs) {
}
// store entries into the matrices
- for (const auto& entry : costs) {
+ for (const auto& entry : delay_costs) {
+ int seg = entry.first.seg_index;
+ int box = size_t(entry.first.box_id);
+ const auto& seg_box_bounds = bounds[seg][box];
+ int x = entry.first.delta.x() - seg_box_bounds.xmin();
+ int y = entry.first.delta.y() - seg_box_bounds.ymin();
+ cost_map_[seg][box][x][y].delay = entry.second;
+ }
+ for (const auto& entry : base_costs) {
int seg = entry.first.seg_index;
int box = size_t(entry.first.box_id);
const auto& seg_box_bounds = bounds[seg][box];
int x = entry.first.delta.x() - seg_box_bounds.xmin();
int y = entry.first.delta.y() - seg_box_bounds.ymin();
- cost_map_[seg][box][x][y] = entry.second;
+ cost_map_[seg][box][x][y].congestion = entry.second;
}
// fill the holes
for (size_t seg = 0; seg < seg_count_; seg++) {
for (size_t box = 0; box < box_count_; box++) {
+ penalty_[seg][box] = std::numeric_limits<float>::infinity();
const auto& seg_box_bounds = bounds[seg][box];
if (seg_box_bounds.empty()) {
continue;
@@ -244,7 +219,7 @@ void CostMap::set_cost_map(const RoutingCosts& costs) {
auto& matrix = cost_map_[seg][box];
// calculate delay penalty
- float min_delay = 0.f, max_delay = 0.f;
+ float min_delay = std::numeric_limits<float>::infinity(), max_delay = 0.f;
vtr::Point<int> min_location(0, 0), max_location(0, 0);
for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) {
for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) {
@@ -262,6 +237,7 @@ void CostMap::set_cost_map(const RoutingCosts& costs) {
}
}
float delay_penalty = (max_delay - min_delay) / static_cast<float>(std::max(1, manhattan_distance(max_location, min_location)));
+ penalty_[seg][box] = delay_penalty;
// find missing cost entries and fill them in by copying a nearby cost entry
std::vector<std::tuple<unsigned, unsigned, util::Cost_Entry>> missing;
@@ -284,11 +260,10 @@ void CostMap::set_cost_map(const RoutingCosts& costs) {
}
}
}
-#if !defined(FILL_LIMIT)
if (couldnt_fill) {
+ // give up trying to fill an empty matrix
break;
}
-#endif
}
if (!couldnt_fill) {
@@ -303,13 +278,11 @@ void CostMap::set_cost_map(const RoutingCosts& costs) {
if (couldnt_fill) {
VTR_LOG_WARN("Couldn't fill holes in the cost matrix for %d -> %ld, %d x %d bounding box\n",
seg, box, seg_box_bounds.width(), seg_box_bounds.height());
-#if !defined(FILL_LIMIT)
for (unsigned y = 0; y < matrix.dim_size(1); y++) {
for (unsigned x = 0; x < matrix.dim_size(0); x++) {
VTR_ASSERT(!matrix[x][y].valid());
}
}
-#endif
}
}
}
@@ -325,11 +298,11 @@ void CostMap::print(int iseg) const {
box_id < device_ctx.connection_boxes.num_connection_box_types();
box_id++) {
auto& matrix = cost_map_[iseg][box_id];
- VTR_LOG("cost for box_id = %lu (%zu, %zu)\n", box_id, matrix.dim_size(0), matrix.dim_size(1));
if (matrix.dim_size(0) == 0 || matrix.dim_size(1) == 0) {
VTR_LOG("cost EMPTY for box_id = %lu\n", box_id);
continue;
}
+ VTR_LOG("cost for box_id = %lu (%zu, %zu)\n", box_id, matrix.dim_size(0), matrix.dim_size(1));
double sum = 0.0;
for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) {
for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) {
@@ -387,46 +360,40 @@ std::pair<util::Cost_Entry, int> CostMap::get_nearby_cost_entry(const vtr::NdMat
int cy,
const vtr::Rect<int>& bounds) {
// spiral around (cx, cy) looking for a nearby entry
- int n = 1;
- bool in_bounds;
- util::Cost_Entry entry;
- entry.fill = true;
+ bool in_bounds = bounds.contains(vtr::Point<int>(cx, cy));
+ if (!in_bounds) {
+ return std::make_pair(util::Cost_Entry(), 0);
+ }
+ int n = 0;
+ util::Cost_Entry fill(matrix[cx][cy]);
+ fill.fill = true;
- do {
+ while (in_bounds && !fill.valid()) {
+ n++;
in_bounds = false;
+ util::Cost_Entry min_entry;
for (int ox = -n; ox <= n; ox++) {
int x = cx + ox;
int oy = n - abs(ox);
int yp = cy + oy;
int yn = cy - oy;
if (bounds.contains(vtr::Point<int>(x, yp))) {
- assign_min_entry(&entry, matrix[x][yp]);
+ assign_min_entry(&min_entry, matrix[x][yp]);
in_bounds = true;
}
if (bounds.contains(vtr::Point<int>(x, yn))) {
- assign_min_entry(&entry, matrix[x][yn]);
+ assign_min_entry(&min_entry, matrix[x][yn]);
in_bounds = true;
}
}
- if (entry.valid()) return std::make_pair(entry, n);
- n++;
-#if defined(FILL_LIMIT)
- if (n > FILL_LIMIT) {
- break;
+ if (!std::isfinite(fill.delay)) {
+ fill.delay = min_entry.delay;
}
-#endif
- } while (in_bounds);
- return std::make_pair(util::Cost_Entry(), n);
-}
-
-// sample on a uniformly spaced grid within a rectangle
-// sample(vtr::Rect(N), 0, 0, M) is vtr::Point(0, 0)
-// sample(vtr::Rect(N), M, M, M) is vtr::Point(N, N)
-// To avoid the edges, use `sample(r, x+1, y+1, N+1) for x, y, in 0..N-1
-template<typename T>
-vtr::Point<T> sample(const vtr::Rect<T>& r, T x, T y, T d) {
- return vtr::Point<T>((r.xmin() * (d - x) + r.xmax() * x + d / 2) / d,
- (r.ymin() * (d - y) + r.ymax() * y + d / 2) / d);
+ if (!std::isfinite(fill.congestion)) {
+ fill.congestion = min_entry.congestion;
+ }
+ }
+ return std::make_pair(fill, n);
}
// derive a cost from the map between two nodes
@@ -479,7 +446,7 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind,
ssize_t dy = ssize_t(from_canonical_loc->second) - ssize_t(box_location.second);
int from_seg_index = cost_map_.node_to_segment(from_node_ind);
- util::Cost_Entry cost_entry = cost_map_.find_cost(from_seg_index, box_id, dx, dy, nullptr);
+ util::Cost_Entry cost_entry = cost_map_.find_cost(from_seg_index, box_id, dx, dy);
if (!cost_entry.valid()) {
// there is no route
@@ -521,51 +488,41 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind,
}
// add a best cost routing path from start_node_ind to node_ind to routing costs
-static void add_paths(int start_node_ind,
- int node_ind,
+template<typename Entry>
+static bool add_paths(int start_node_ind,
+ Entry current,
std::unordered_map<int, util::Search_Path>* paths,
RoutingCosts* routing_costs) {
auto& device_ctx = g_vpr_ctx.device();
ConnectionBoxId box_id;
std::pair<size_t, size_t> box_location;
float site_pin_delay;
+ int node_ind = current.rr_node_ind;
bool found = device_ctx.connection_boxes.find_connection_box(
node_ind, &box_id, &box_location, &site_pin_delay);
+ bool new_sample_found = false;
if (!found) {
VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", node_ind);
}
// reconstruct the path
std::vector<int> path;
- for (int i = node_ind; i != start_node_ind; path.push_back(i = (*paths)[i].parent))
- ;
- util::PQ_Entry parent_entry(start_node_ind, UNDEFINED, 0, 0, 0, true, /*Tsw_adjust=*/0.f);
-
- // recalculate the path with congestion
- util::PQ_Entry current_full = parent_entry;
- int parent = start_node_ind;
- for (auto it = path.rbegin(); it != path.rend(); it++) {
- auto& parent_node = device_ctx.rr_nodes[parent];
- float Tsw_adjust = 0.f;
-
- // Remove site pin delay when taking edge from last channel to IPIN.
- if (*it == node_ind) {
- Tsw_adjust = -site_pin_delay;
- }
- current_full = util::PQ_Entry(*it, parent_node.edge_switch((*paths)[*it].edge), current_full.delay,
- current_full.R_upstream, current_full.congestion_upstream, false, Tsw_adjust);
- parent = *it;
+ for (int i = (*paths)[node_ind].parent; i != start_node_ind; i = (*paths)[i].parent) {
+ path.push_back(i);
}
+ path.push_back(start_node_ind);
+
+ current.adjust_Tsw(-site_pin_delay);
// add each node along the path subtracting the incremental costs from the current costs
- parent = start_node_ind;
+ Entry start_to_here(start_node_ind, UNDEFINED, nullptr);
+ int parent = start_node_ind;
for (auto it = path.rbegin(); it != path.rend(); it++) {
- auto& parent_node = device_ctx.rr_nodes[parent];
- int seg_index = device_ctx.rr_indexed_data[parent_node.cost_index()].seg_index;
- const std::pair<size_t, size_t>* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(parent);
+ auto& here = device_ctx.rr_nodes[*it];
+ int seg_index = device_ctx.rr_indexed_data[here.cost_index()].seg_index;
+ const std::pair<size_t, size_t>* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(*it);
if (from_canonical_loc == nullptr) {
- VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d",
- parent);
+ VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", *it);
}
vtr::Point<int> delta(ssize_t(from_canonical_loc->first) - ssize_t(box_location.first),
@@ -574,36 +531,52 @@ static void add_paths(int start_node_ind,
seg_index,
box_id,
delta};
- CompressedRoutingCostKey compressed_key(key);
- float new_delay = current_full.delay - parent_entry.delay;
- float new_congestion = current_full.congestion_upstream - parent_entry.congestion_upstream;
+ if (*it != start_node_ind) {
+ auto& parent_node = device_ctx.rr_nodes[parent];
+ start_to_here = Entry(*it, parent_node.edge_switch((*paths)[*it].edge), &start_to_here);
+ parent = *it;
+ }
- VTR_ASSERT(new_delay >= 0.f);
- VTR_ASSERT(new_congestion >= 0.f);
+ float cost = current.cost() - start_to_here.cost();
+ if (cost < 0.f && cost > -10e-15 /* 10 femtosecond */) {
+ cost = 0.f;
+ }
- auto val = util::Cost_Entry(new_delay, new_congestion);
+ VTR_ASSERT(cost >= 0.f);
// NOTE: implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST
- auto result = routing_costs->insert(std::make_pair(key, val));
+ auto result = routing_costs->insert(std::make_pair(key, cost));
if (!result.second) {
- assign_min_entry(&result.first->second, val);
+ if (cost < result.first->second) {
+ result.first->second = cost;
+ new_sample_found = true;
+ } else if (BREAK_ON_MISS) {
+ break;
+ }
+ } else {
+ new_sample_found = true;
}
-
- // update parent data
- parent_entry = util::PQ_Entry(*it, parent_node.edge_switch((*paths)[*it].edge), parent_entry.delay,
- parent_entry.R_upstream, parent_entry.congestion_upstream, false, /*Tsw_adjust=*/0.f);
- parent = *it;
}
+ return new_sample_found;
}
-/* runs Dijkstra's algorithm from specified node until all nodes have been
+/* Runs Dijkstra's algorithm from specified node until all nodes have been
* visited. Each time a pin is visited, the delay/congestion information
- * to that pin is stored to an entry in the routing_cost_map */
+ * to that pin is stored to an entry in the routing_cost_map.
+ *
+ * Returns the maximum (last) minimum cost path stored, and
+ * the number of paths from start_node_ind stored. */
template<typename Entry>
-static void run_dijkstra(int start_node_ind,
- RoutingCosts* routing_costs) {
+static std::pair<float, int> run_dijkstra(int start_node_ind,
+ RoutingCosts* routing_costs) {
auto& device_ctx = g_vpr_ctx.device();
+ int path_count = 0;
+ const std::pair<size_t, size_t>* start_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(start_node_ind);
+ if (start_canonical_loc == nullptr) {
+ VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d",
+ start_node_ind);
+ }
/* a list of boolean flags (one for each rr node) to figure out if a
* certain node has already been expanded */
@@ -617,7 +590,8 @@ static void run_dijkstra(int start_node_ind,
std::priority_queue<Entry, std::vector<Entry>, std::greater<Entry>> pq;
/* first entry has no upstream delay or congestion */
- Entry first_entry(start_node_ind, UNDEFINED, 0, true);
+ Entry first_entry(start_node_ind, UNDEFINED, nullptr);
+ float max_cost = first_entry.cost();
pq.push(first_entry);
@@ -635,13 +609,18 @@ static void run_dijkstra(int start_node_ind,
/* if this node is an ipin record its congestion/delay in the routing_cost_map */
if (device_ctx.rr_nodes[node_ind].type() == IPIN) {
- add_paths(start_node_ind, node_ind, &paths, routing_costs);
+ // the last cost should be the highest
+ max_cost = current.cost();
+
+ path_count++;
+ add_paths<Entry>(start_node_ind, current, &paths, routing_costs);
} else {
- expand_dijkstra_neighbours(device_ctx.rr_nodes,
- current, paths, node_expanded, pq);
+ util::expand_dijkstra_neighbours(device_ctx.rr_nodes,
+ current, paths, node_expanded, pq);
node_expanded[node_ind] = true;
}
}
+ return std::make_pair(max_cost, path_count);
}
// compute the cost maps for lookahead
@@ -652,30 +631,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector<t_segment_inf>& segmen
alloc_and_load_rr_node_route_structs();
size_t num_segments = segment_inf.size();
- std::vector<SamplePoint> sample_points;
- {
- std::vector<SampleGrid> inodes_for_segment(num_segments);
- find_inodes_for_segment_types(&inodes_for_segment);
-
- // collapse into a vector
- for (auto& grid : inodes_for_segment) {
- for (int y = 0; y < SAMPLE_GRID_SIZE; y++) {
- for (int x = 0; x < SAMPLE_GRID_SIZE; x++) {
- auto& point = grid.point[y][x];
- if (!point.samples.empty()) {
- point.order = point.samples[0];
- sample_points.push_back(point);
- }
- }
- }
- }
- }
-
- // sort by VPR coordinate
- std::sort(sample_points.begin(), sample_points.end(),
- [](const SamplePoint& a, const SamplePoint& b) {
- return a.order < b.order;
- });
+ std::vector<SampleRegion> sample_regions = find_sample_regions(num_segments);
/* free previous delay map and allocate new one */
auto& device_ctx = g_vpr_ctx.device();
@@ -683,42 +639,87 @@ void ConnectionBoxMapLookahead::compute(const std::vector<t_segment_inf>& segmen
device_ctx.connection_boxes.num_connection_box_types());
VTR_ASSERT(REPRESENTATIVE_ENTRY_METHOD == util::SMALLEST);
- RoutingCosts all_costs;
+ RoutingCosts all_delay_costs;
+ RoutingCosts all_base_costs;
/* run Dijkstra's algorithm for each segment type & channel type combination */
#if defined(VPR_USE_TBB)
tbb::mutex all_costs_mutex;
- tbb::parallel_for_each(sample_points, [&](const SamplePoint& point) {
+ tbb::parallel_for_each(sample_regions, [&](const SampleRegion& region) {
#else
- for (const auto& point : sample_points) {
+ for (const auto& region : sample_regions) {
#endif
// holds the cost entries for a run
- RoutingCosts costs;
+ RoutingCosts delay_costs;
+ RoutingCosts base_costs;
+ int total_path_count = 0;
+
+ for (auto& point : region.points) {
+ // statistics
+ vtr::Timer run_timer;
+ float max_delay_cost = 0.f;
+ float max_base_cost = 0.f;
+ int path_count = 0;
+ for (auto node_ind : point.nodes) {
+ {
+ auto result = run_dijkstra<util::PQ_Entry_Delay>(node_ind, &delay_costs);
+ max_delay_cost = std::max(max_delay_cost, result.first);
+ path_count += result.second;
+ }
+ {
+ auto result = run_dijkstra<util::PQ_Entry_Base_Cost>(node_ind, &base_costs);
+ max_base_cost = std::max(max_base_cost, result.first);
+ path_count += result.second;
+ }
+ }
- for (auto node_ind : point.samples) {
- run_dijkstra<util::PQ_Entry_Delay>(node_ind, &costs);
- run_dijkstra<util::PQ_Entry_Base_Cost>(node_ind, &costs);
+ if (path_count > 0) {
+ VTR_LOG_WARN("Expanded %d paths of segment type %s(%d) starting at (%d, %d) from %d segments, max_cost %e %e (%g paths/sec)\n",
+ path_count, segment_inf[region.segment_type].name.c_str(), region.segment_type,
+ point.location.x(), point.location.y(),
+ (int)point.nodes.size(),
+ max_delay_cost, max_base_cost,
+ path_count / run_timer.elapsed_sec());
+ }
+
+ /*
+ * if (path_count == 0) {
+ * for (auto node_ind : point.nodes) {
+ * VTR_LOG("Expanded node %s\n", describe_rr_node(node_ind).c_str());
+ * }
+ * }
+ */
+
+ total_path_count += path_count;
+ if (total_path_count > MIN_PATH_COUNT) {
+ break;
+ }
}
#if defined(VPR_USE_TBB)
all_costs_mutex.lock();
#endif
- for (auto node_ind : point.samples) {
- VTR_LOG("Expanded node %s\n", describe_rr_node(node_ind).c_str());
+ if (total_path_count == 0) {
+ VTR_LOG("No paths found for sample region %s(%d, %d)\n",
+ segment_inf[region.segment_type].name.c_str(), region.grid_location.x(), region.grid_location.y());
}
- VTR_LOG("Expanded sample point (%d, %d)\n",
- point.location.x(), point.location.y());
-
// combine the cost map from this run with the final cost maps for each segment
- for (const auto& cost : costs) {
- const auto& key = cost.first;
+ for (const auto& cost : delay_costs) {
const auto& val = cost.second;
- auto result = all_costs.insert(std::make_pair(key, val));
+ auto result = all_delay_costs.insert(std::make_pair(cost.first, val));
if (!result.second) {
// implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST
- assign_min_entry(&result.first->second, val);
+ result.first->second = std::min(result.first->second, val);
+ }
+ }
+ for (const auto& cost : base_costs) {
+ const auto& val = cost.second;
+ auto result = all_base_costs.insert(std::make_pair(cost.first, val));
+ if (!result.second) {
+ // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST
+ result.first->second = std::min(result.first->second, val);
}
}
@@ -732,7 +733,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector<t_segment_inf>& segmen
VTR_LOG("Combining results\n");
/* boil down the cost list in routing_cost_map at each coordinate to a
* representative cost entry and store it in the lookahead cost map */
- cost_map_.set_cost_map(all_costs);
+ cost_map_.set_cost_map(all_delay_costs, all_base_costs);
// diagnostics
#if defined(CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_ENTRIES)
@@ -785,91 +786,208 @@ float ConnectionBoxMapLookahead::get_expected_cost(
}
// the smallest bounding box containing a node
-static vtr::Rect<int> bounding_box_for_node(const t_rr_node& node) {
- return vtr::Rect<int>(node.xlow(), node.ylow(),
- node.xhigh() + 1, node.yhigh() + 1);
+static vtr::Rect<int> bounding_box_for_node(const ConnectionBoxes& connection_boxes, int node_ind) {
+ const std::pair<size_t, size_t>* loc = connection_boxes.find_canonical_loc(node_ind);
+ if (loc == nullptr) {
+ return vtr::Rect<int>();
+ } else {
+ return vtr::Rect<int>(vtr::Point<int>(loc->first, loc->second));
+ }
+}
+
+static vtr::Rect<int> sample_window(const vtr::Rect<int>& bounding_box, int sx, int sy, int n) {
+ return vtr::Rect<int>(sample(bounding_box, sx, sy, n),
+ sample(bounding_box, sx + 1, sy + 1, n));
+}
+
+static std::vector<SamplePoint> choose_points(const vtr::Matrix<int>& counts,
+ const vtr::Rect<int>& window,
+ int min_count,
+ int max_count) {
+ VTR_ASSERT(min_count <= max_count);
+ std::vector<SamplePoint> points;
+ for (int y = window.ymin(); y < window.ymax(); y++) {
+ for (int x = window.xmin(); x < window.xmax(); x++) {
+ if (counts[x][y] >= min_count && counts[x][y] <= max_count) {
+ points.push_back(SamplePoint{/* .location = */ vtr::Point<int>(x, y),
+ /* .nodes = */ {}});
+ }
+ }
+ }
+
+ vtr::Point<int> center = sample(window, 1, 1, 2);
+
+ // sort by distance from center
+ std::sort(points.begin(), points.end(),
+ [&](const SamplePoint& a, const SamplePoint& b) {
+ return manhattan_distance(a.location, center) < manhattan_distance(b.location, center);
+ });
+
+ return points;
+}
+
+// histogram is a map from segment count to number of locations having that count
+static int quantile(const std::map<int, int>& histogram, float ratio) {
+ if (histogram.empty()) {
+ return 0;
+ }
+ int sum = 0;
+ for (const auto& entry : histogram) {
+ sum += entry.second;
+ }
+ int limit = std::ceil(sum * ratio);
+ for (const auto& entry : histogram) {
+ limit -= entry.second;
+ if (limit <= 0) {
+ return entry.first;
+ }
+ }
+ return 0;
+}
+
+// select a good number of segments to find
+static std::map<int, int> count_histogram(const vtr::Rect<int>& box, const vtr::Matrix<int>& counts) {
+ std::map<int, int> histogram;
+ for (int y = box.ymin(); y < box.ymax(); y++) {
+ for (int x = box.xmin(); x < box.xmax(); x++) {
+ int count = counts[x][y];
+ if (count > 0) {
+ ++histogram[count];
+ }
+ }
+ }
+ return histogram;
}
-// the center point for a node
-// it is unknown where the the node starts, so use the average
-static vtr::Point<int> point_for_node(const t_rr_node& node) {
- int x = (node.xhigh() + node.xlow()) / 2;
- int y = (node.yhigh() + node.ylow()) / 2;
- return vtr::Point<int>(x, y);
+// Used to calculate each region's `order.'
+// A space-filling curve will order the regions so that
+// nearby points stay close in order. A Hilbert curve might
+// be better, but a Morton (Z)-order curve is easy to compute,
+// because it's just interleaving binary bits, so this
+// function interleaves with 0's so that the X and Y
+// dimensions can then be OR'ed together.
+static uint64_t interleave(uint32_t x) {
+ uint64_t i = x;
+ i = (i ^ (i << 16)) & 0x0000ffff0000ffff;
+ i = (i ^ (i << 8)) & 0x00ff00ff00ff00ff;
+ i = (i ^ (i << 4)) & 0x0f0f0f0f0f0f0f0f;
+ i = (i ^ (i << 2)) & 0x3333333333333333;
+ i = (i ^ (i << 1)) & 0x5555555555555555;
+ return i;
}
// for each segment type, find the nearest nodes to an equally spaced grid of points
// within the bounding box for that segment type
-static void find_inodes_for_segment_types(std::vector<SampleGrid>* inodes_for_segment) {
+static std::vector<SampleRegion> find_sample_regions(int num_segments) {
+ vtr::ScopedStartFinishTimer timer("finding sample regions");
+ std::vector<SampleRegion> sample_regions;
auto& device_ctx = g_vpr_ctx.device();
auto& rr_nodes = device_ctx.rr_nodes;
- const int num_segments = inodes_for_segment->size();
+ std::vector<vtr::Matrix<int>> segment_counts(num_segments);
// compute bounding boxes for each segment type
std::vector<vtr::Rect<int>> bounding_box_for_segment(num_segments, vtr::Rect<int>());
for (size_t i = 0; i < rr_nodes.size(); i++) {
auto& node = rr_nodes[i];
if (node.type() != CHANX && node.type() != CHANY) continue;
+ if (node.capacity() == 0 || node.num_edges() == 0) continue;
int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index;
VTR_ASSERT(seg_index != OPEN);
VTR_ASSERT(seg_index < num_segments);
- bounding_box_for_segment[seg_index] |= bounding_box_for_node(node);
+ bounding_box_for_segment[seg_index].expand_bounding_box(bounding_box_for_node(device_ctx.connection_boxes, i));
}
- // select an inode near the center of the bounding box for each segment type
- inodes_for_segment->clear();
- inodes_for_segment->resize(num_segments);
- for (auto& grid : *inodes_for_segment) {
- for (int y = 0; y < SAMPLE_GRID_SIZE; y++) {
- for (int x = 0; x < SAMPLE_GRID_SIZE; x++) {
- grid.point[y][x].samples = std::vector<ssize_t>();
- }
- }
+ // initialize counts
+ for (int seg = 0; seg < num_segments; seg++) {
+ const auto& box = bounding_box_for_segment[seg];
+ segment_counts[seg] = vtr::Matrix<int>({size_t(box.width()), size_t(box.height())}, 0);
}
+ // count sample points
for (size_t i = 0; i < rr_nodes.size(); i++) {
auto& node = rr_nodes[i];
- vtr::Rect<int> node_bounds = bounding_box_for_node(node);
if (node.type() != CHANX && node.type() != CHANY) continue;
- if (node.capacity() == 0 || device_ctx.connection_boxes.find_canonical_loc(i) == nullptr) continue;
+ if (node.capacity() == 0 || node.num_edges() == 0) continue;
+ const std::pair<size_t, size_t>* loc = device_ctx.connection_boxes.find_canonical_loc(i);
+ if (loc == nullptr) continue;
int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index;
+ segment_counts[seg_index][loc->first][loc->second] += 1;
VTR_ASSERT(seg_index != OPEN);
VTR_ASSERT(seg_index < num_segments);
+ }
- auto& grid = (*inodes_for_segment)[seg_index];
- for (int sy = 0; sy < SAMPLE_GRID_SIZE; sy++) {
- for (int sx = 0; sx < SAMPLE_GRID_SIZE; sx++) {
- auto& point = grid.point[sy][sx];
- if (point.samples.empty()) {
- point.samples.push_back(i);
- point.location = vtr::Point<int>(node.xlow(), node.ylow());
- goto next_rr_node;
+ // select sample points
+ for (int i = 0; i < num_segments; i++) {
+ const auto& counts = segment_counts[i];
+ const auto& bounding_box = bounding_box_for_segment[i];
+ if (bounding_box.empty()) continue;
+ for (int y = 0; y < SAMPLE_GRID_SIZE; y++) {
+ for (int x = 0; x < SAMPLE_GRID_SIZE; x++) {
+ vtr::Rect<int> window = sample_window(bounding_box, x, y, SAMPLE_GRID_SIZE);
+ auto histogram = count_histogram(window, segment_counts[i]);
+ SampleRegion region = {
+ /* .segment_type = */ i,
+ /* .grid_location = */ vtr::Point<int>(x, y),
+ /* .points = */ choose_points(counts, window, quantile(histogram, kSamplingCountLowerQuantile), quantile(histogram, kSamplingCountUpperQuantile)),
+ /* .order = */ 0};
+ if (!region.points.empty()) {
+ /* In order to improve caching, the list of sample points are
+ * sorted to keep points that are nearby on the Euclidean plane also
+ * nearby in the vector of sample points.
+ *
+ * This means subsequent expansions on the same thread are likely
+ * to cover a similar set of nodes, so they are more likely to be
+ * cached. This improves performance by about 7%, which isn't a lot,
+ * but not a bad improvement for a few lines of code. */
+ vtr::Point<int> location = region.points[0].location;
+
+ // interleave bits of X and Y for a Z-curve ordering.
+ region.order = interleave(location.x()) | (interleave(location.y()) << 1);
+
+ sample_regions.push_back(region);
}
+ }
+ }
+ }
- if (node_bounds.contains(point.location)) {
- point.samples.push_back(i);
- goto next_rr_node;
- }
+ // sort regions
+ std::sort(sample_regions.begin(), sample_regions.end(),
+ [](const SampleRegion& a, const SampleRegion& b) {
+ return a.order < b.order;
+ });
- vtr::Point<int> target = sample(bounding_box_for_segment[seg_index], sx + 1, sy + 1, SAMPLE_GRID_SIZE + 1);
- int distance_new = manhattan_distance(point_for_node(node), target);
- int distance_stored = manhattan_distance(point.location, target);
- if (distance_new < distance_stored) {
- point.samples.clear();
- point.samples.push_back(i);
- goto next_rr_node;
- }
- }
+ // build an index of sample points on segment type and location
+ std::map<std::tuple<int, int, int>, SamplePoint*> sample_point_index;
+ for (auto& region : sample_regions) {
+ for (auto& point : region.points) {
+ sample_point_index[std::make_tuple(region.segment_type, point.location.x(), point.location.y())] = &point;
}
+ }
+
+ // collect the node indices for each segment type at the selected sample points
+ for (size_t i = 0; i < rr_nodes.size(); i++) {
+ auto& node = rr_nodes[i];
+ if (node.type() != CHANX && node.type() != CHANY) continue;
+ if (node.capacity() == 0 || node.num_edges() == 0) continue;
+ const std::pair<size_t, size_t>* loc = device_ctx.connection_boxes.find_canonical_loc(i);
+ if (loc == nullptr) continue;
+
+ int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index;
+
+ VTR_ASSERT(seg_index != OPEN);
+ VTR_ASSERT(seg_index < num_segments);
- // to break out from the inner loop
- next_rr_node:
- continue;
+ auto point = sample_point_index.find(std::make_tuple(seg_index, loc->first, loc->second));
+ if (point != sample_point_index.end()) {
+ point->second->nodes.push_back(i);
+ }
}
+
+ return sample_regions;
}
#ifndef VTR_ENABLE_CAPNPROTO
@@ -893,6 +1011,7 @@ void ConnectionBoxMapLookahead::write(const std::string& file) const {
static void ToCostEntry(util::Cost_Entry* out, const VprCostEntry::Reader& in) {
out->delay = in.getDelay();
out->congestion = in.getCongestion();
+ out->fill = in.getFill();
}
static void FromCostEntry(VprCostEntry::Builder* out, const util::Cost_Entry& in) {
@@ -922,6 +1041,16 @@ static void FromMatrixCostEntry(
out, in, FromCostEntry);
}
+static void ToFloat(float* out, const VprFloatEntry::Reader& in) {
+ // Getting a scalar field is always "get<field name>()".
+ *out = in.getValue();
+}
+
+static void FromFloat(VprFloatEntry::Builder* out, const float& in) {
+ // Setting a scalar field is always "set<field name>(value)".
+ out->setValue(in);
+}
+
void CostMap::read(const std::string& file) {
MmapFile f(file);
@@ -952,6 +1081,12 @@ void CostMap::read(const std::string& file) {
ToNdMatrix<2, Matrix<VprCostEntry>, vtr::NdMatrix<util::Cost_Entry, 2>>(
&cost_map_, cost_maps, ToMatrixCostEntry);
}
+
+ {
+ const auto& penalty = cost_map.getPenalty();
+ ToNdMatrix<2, VprFloatEntry, float>(
+ &penalty_, penalty, ToFloat);
+ }
}
void CostMap::write(const std::string& file) const {
@@ -978,6 +1113,12 @@ void CostMap::write(const std::string& file) const {
&cost_maps, cost_map_, FromMatrixCostEntry);
}
+ {
+ auto penalty = cost_map.initPenalty();
+ FromNdMatrix<2, VprFloatEntry, float>(
+ &penalty, penalty_, FromFloat);
+ }
+
writeMessageToFile(file, &builder);
}
#endif
diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h
index 960ef84f2..70423409f 100644
--- a/vpr/src/route/connection_box_lookahead_map.h
+++ b/vpr/src/route/connection_box_lookahead_map.h
@@ -32,54 +32,27 @@ struct RoutingCostKey {
}
};
-// compressed version of RoutingCostKey
-// TODO add bounds checks
-struct CompressedRoutingCostKey {
- uint32_t data;
-
- CompressedRoutingCostKey() {
- data = -1;
- }
- CompressedRoutingCostKey(const RoutingCostKey& key) {
- data = key.seg_index & 0xff;
- data <<= 8;
- data |= size_t(key.box_id) & 0xff;
- data <<= 8;
- data |= key.delta.x() & 0xff;
- data <<= 8;
- data |= key.delta.y() & 0xff;
- }
-
- bool operator==(CompressedRoutingCostKey other) const {
- return data == other.data;
- }
-};
-
// hash implementation for RoutingCostKey
struct HashRoutingCostKey {
std::size_t operator()(RoutingCostKey const& key) const noexcept {
- uint64_t data;
- data = key.seg_index & 0xffff;
- data <<= 16;
- data |= size_t(key.box_id) & 0xffff;
- data <<= 16;
- data |= key.delta.x() & 0xffff;
- data <<= 16;
- data |= key.delta.y() & 0xffff;
- return std::hash<uint64_t>{}(data);
+ std::size_t hash = std::hash<int>{}(key.seg_index);
+ vtr::hash_combine(hash, key.box_id);
+ vtr::hash_combine(hash, key.delta.x());
+ vtr::hash_combine(hash, key.delta.y());
+ return hash;
}
};
// Map used to store intermediate routing costs
-typedef std::unordered_map<RoutingCostKey, util::Cost_Entry, HashRoutingCostKey> RoutingCosts;
+typedef std::unordered_map<RoutingCostKey, float, HashRoutingCostKey> RoutingCosts;
// Dense cost maps per source segment and destination connection box types
class CostMap {
public:
void set_counts(size_t seg_count, size_t box_count);
int node_to_segment(int from_node_ind) const;
- util::Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y, int* out_of_bounds_val) const;
- void set_cost_map(const RoutingCosts& costs);
+ util::Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const;
+ void set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs);
std::pair<util::Cost_Entry, int> get_nearby_cost_entry(const vtr::NdMatrix<util::Cost_Entry, 2>& matrix, int cx, int cy, const vtr::Rect<int>& bounds);
void read(const std::string& file);
void write(const std::string& file) const;
@@ -87,8 +60,9 @@ class CostMap {
std::vector<std::pair<int, int>> list_empty() const;
private:
- vtr::NdMatrix<vtr::NdMatrix<util::Cost_Entry, 2>, 2> cost_map_;
- vtr::NdMatrix<std::pair<int, int>, 2> offset_;
+ vtr::Matrix<vtr::Matrix<util::Cost_Entry>> cost_map_;
+ vtr::Matrix<std::pair<int, int>> offset_;
+ vtr::Matrix<float> penalty_;
std::vector<int> segment_map_;
size_t seg_count_;
size_t box_count_;
diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp
index 6226662e7..081d62760 100644
--- a/vpr/src/route/router_lookahead_map_utils.cpp
+++ b/vpr/src/route/router_lookahead_map_utils.cpp
@@ -5,7 +5,9 @@
#include "vtr_math.h"
#include "route_common.h"
-util::PQ_Entry::PQ_Entry(
+namespace util {
+
+PQ_Entry::PQ_Entry(
int set_rr_node_ind,
int switch_ind,
float parent_delay,
@@ -53,13 +55,11 @@ util::PQ_Entry::PQ_Entry(
util::PQ_Entry_Delay::PQ_Entry_Delay(
int set_rr_node_ind,
int switch_ind,
- float parent_delay,
- bool starting_node) {
+ const util::PQ_Entry_Delay* parent) {
this->rr_node_ind = set_rr_node_ind;
- auto& device_ctx = g_vpr_ctx.device();
- this->delay_cost = parent_delay;
- if (!starting_node) {
+ if (parent != nullptr) {
+ auto& device_ctx = g_vpr_ctx.device();
float Tsw = device_ctx.rr_switch_inf[switch_ind].Tdel;
float Rsw = device_ctx.rr_switch_inf[switch_ind].R;
float Cnode = device_ctx.rr_nodes[set_rr_node_ind].C();
@@ -73,23 +73,27 @@ util::PQ_Entry_Delay::PQ_Entry_Delay(
}
VTR_ASSERT(T_linear >= 0.);
- this->delay_cost += T_linear;
+ this->delay_cost = parent->delay_cost + T_linear;
+ } else {
+ this->delay_cost = 0.f;
}
}
util::PQ_Entry_Base_Cost::PQ_Entry_Base_Cost(
int set_rr_node_ind,
int switch_ind,
- float upstream_base_costs,
- bool starting_node) {
+ const util::PQ_Entry_Base_Cost* parent) {
this->rr_node_ind = set_rr_node_ind;
- auto& device_ctx = g_vpr_ctx.device();
- this->base_cost = upstream_base_costs;
- if (!starting_node) {
+ if (parent != nullptr) {
+ auto& device_ctx = g_vpr_ctx.device();
if (device_ctx.rr_switch_inf[switch_ind].configurable()) {
- this->base_cost += get_rr_cong_cost(set_rr_node_ind);
+ this->base_cost = parent->base_cost + get_rr_cong_cost(set_rr_node_ind);
+ } else {
+ this->base_cost = parent->base_cost;
}
+ } else {
+ this->base_cost = 0.f;
}
}
@@ -188,3 +192,59 @@ util::Cost_Entry util::Expansion_Cost_Entry::get_median_entry() const {
return representative_entry;
}
+
+template<typename Entry>
+void expand_dijkstra_neighbours(const std::vector<t_rr_node>& rr_nodes,
+ const Entry& parent_entry,
+ std::unordered_map<int, util::Search_Path>& paths,
+ std::vector<bool>& node_expanded,
+ std::priority_queue<Entry,
+ std::vector<Entry>,
+ std::greater<Entry>>& pq) {
+ int parent_ind = parent_entry.rr_node_ind;
+
+ auto& parent_node = rr_nodes[parent_ind];
+
+ for (int iedge = 0; iedge < parent_node.num_edges(); iedge++) {
+ int child_node_ind = parent_node.edge_sink_node(iedge);
+ int switch_ind = parent_node.edge_switch(iedge);
+
+ /* skip this child if it has already been expanded from */
+ if (node_expanded[child_node_ind]) {
+ continue;
+ }
+
+ Entry child_entry(child_node_ind, switch_ind, &parent_entry);
+ VTR_ASSERT(child_entry.cost() >= 0);
+ pq.push(child_entry);
+
+ /* Create (if it doesn't exist) or update (if the new cost is lower)
+ * to specified node */
+ Search_Path path_entry = {child_entry.cost(), parent_ind, iedge};
+ auto result = paths.insert(std::make_pair(
+ child_node_ind,
+ path_entry));
+ if (!result.second) {
+ if (child_entry.cost() < result.first->second.cost) {
+ result.first->second = path_entry;
+ }
+ }
+ }
+}
+
+template void expand_dijkstra_neighbours(const std::vector<t_rr_node>& rr_nodes,
+ const PQ_Entry_Delay& parent_entry,
+ std::unordered_map<int, Search_Path>& paths,
+ std::vector<bool>& node_expanded,
+ std::priority_queue<PQ_Entry_Delay,
+ std::vector<PQ_Entry_Delay>,
+ std::greater<PQ_Entry_Delay>>& pq);
+template void expand_dijkstra_neighbours(const std::vector<t_rr_node>& rr_nodes,
+ const PQ_Entry_Base_Cost& parent_entry,
+ std::unordered_map<int, Search_Path>& paths,
+ std::vector<bool>& node_expanded,
+ std::priority_queue<PQ_Entry_Base_Cost,
+ std::vector<PQ_Entry_Base_Cost>,
+ std::greater<PQ_Entry_Base_Cost>>& pq);
+
+} // namespace util
diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h
index 0b39c434f..36e6ea510 100644
--- a/vpr/src/route/router_lookahead_map_utils.h
+++ b/vpr/src/route/router_lookahead_map_utils.h
@@ -49,12 +49,14 @@ class Cost_Entry {
congestion = std::numeric_limits<float>::infinity();
fill = false;
}
- Cost_Entry(float set_delay, float set_congestion) {
- delay = set_delay;
- congestion = set_congestion;
- fill = false;
- }
-
+ Cost_Entry(float set_delay, float set_congestion)
+ : delay(set_delay)
+ , congestion(set_congestion)
+ , fill(false) {}
+ Cost_Entry(float set_delay, float set_congestion, bool set_fill)
+ : delay(set_delay)
+ , congestion(set_congestion)
+ , fill(set_fill) {}
bool valid() const {
return std::isfinite(delay) && std::isfinite(congestion);
}
@@ -141,34 +143,43 @@ class PQ_Entry {
}
};
-// A version of PQ_Entry that only calculates and stores the delay (cost.)
+// A version of PQ_Entry that only calculates and stores the delay.
class PQ_Entry_Delay {
public:
int rr_node_ind; //index in device_ctx.rr_nodes that this entry represents
float delay_cost; //the cost of the path to get to this node
- PQ_Entry_Delay(int set_rr_node_ind, int /*switch_ind*/, float parent_cost, bool starting_node);
+ PQ_Entry_Delay(int set_rr_node_ind, int /*switch_ind*/, const PQ_Entry_Delay* parent);
float cost() const {
return delay_cost;
}
+ void adjust_Tsw(float amount) {
+ delay_cost += amount;
+ }
+
bool operator>(const PQ_Entry_Delay& obj) const {
return (this->delay_cost > obj.delay_cost);
}
};
+// A version of PQ_Entry that only calculates and stores the base cost.
class PQ_Entry_Base_Cost {
public:
int rr_node_ind; //index in device_ctx.rr_nodes that this entry represents
float base_cost;
- PQ_Entry_Base_Cost(int set_rr_node_ind, int /*switch_ind*/, float parent_cost, bool starting_node);
+ PQ_Entry_Base_Cost(int set_rr_node_ind, int /*switch_ind*/, const PQ_Entry_Base_Cost* parent);
float cost() const {
return base_cost;
}
+ void adjust_Tsw(float /* amount */) {
+ // do nothing
+ }
+
bool operator>(const PQ_Entry_Base_Cost& obj) const {
return (this->base_cost > obj.base_cost);
}
@@ -180,46 +191,16 @@ struct Search_Path {
int edge;
};
-} // namespace util
-
/* iterates over the children of the specified node and selectively pushes them onto the priority queue */
template<typename Entry>
void expand_dijkstra_neighbours(const std::vector<t_rr_node>& rr_nodes,
const Entry& parent_entry,
- std::unordered_map<int, util::Search_Path>& paths,
+ std::unordered_map<int, Search_Path>& paths,
std::vector<bool>& node_expanded,
std::priority_queue<Entry,
std::vector<Entry>,
- std::greater<Entry>>& pq) {
- int parent_ind = parent_entry.rr_node_ind;
+ std::greater<Entry>>& pq);
- auto& parent_node = rr_nodes[parent_ind];
-
- for (int iedge = 0; iedge < parent_node.num_edges(); iedge++) {
- int child_node_ind = parent_node.edge_sink_node(iedge);
- int switch_ind = parent_node.edge_switch(iedge);
-
- /* skip this child if it has already been expanded from */
- if (node_expanded[child_node_ind]) {
- continue;
- }
-
- Entry child_entry(child_node_ind, switch_ind, parent_entry.cost(), false);
- VTR_ASSERT(child_entry.cost() >= 0);
- pq.push(child_entry);
-
- /* Create (if it doesn't exist) or update (if the new cost is lower)
- * to specified node */
- util::Search_Path path_entry = {child_entry.cost(), parent_ind, iedge};
- auto result = paths.insert(std::make_pair(
- child_node_ind,
- path_entry));
- if (!result.second) {
- if (child_entry.cost() < result.first->second.cost) {
- result.first->second = path_entry;
- }
- }
- }
-}
+} // namespace util
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment