-
-
Save VisualMelon/cb057316dfd5e00858dc6cab250ef138 to your computer and use it in GitHub Desktop.
using Microsoft.VisualStudio.TestTools.UnitTesting; | |
using Priority_Queue; | |
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Text; | |
using System.Threading.Tasks; | |
namespace CodeReview.AnotherAStar | |
{ | |
public class SharedFunctions | |
{ | |
public static float GetDistanceFromLatLonInMeters(float lat1, float long1, float lat2, float long2) | |
{ | |
// I am not a geographer | |
float dlat = lat1 - lat2; | |
float dlong = lat2 - long2; | |
return (float)Math.Sqrt(dlat * dlat + dlong * dlong); | |
} | |
} | |
// I have no idea what a NodeRow is... so these are just Nodes | |
// are your nodes structs? I am confused why you use the Id everywhere... (if they are classes, then you can probably drop all the nodeId stuff: it just adds unhelpful indirection) | |
public class NodeRow | |
{ | |
public NodeRow(string type, string nodeId, float lattitude, float longitude) | |
{ | |
NodeId = nodeId; | |
Lattitude = lattitude; | |
Longitude = longitude; | |
} | |
public string Type { get; } | |
public string NodeId { get; } | |
public float Lattitude { get; } | |
public float Longitude { get; } | |
private readonly List<NodeEdgeRow> _edges = new List<NodeEdgeRow>(); | |
public IReadOnlyList<NodeEdgeRow> Edges => _edges; | |
/// <summary> | |
/// Connects this node to another with a two-way edge | |
/// </summary> | |
public void ConnectWith(NodeRow other, float weight) | |
{ | |
// enforce contracts on the topology | |
if (other == null) | |
throw new ArgumentNullException(nameof(other)); | |
if (other == this) | |
throw new ArgumentException("Cannot connect a node to itself", nameof(other)); | |
var edge = new NodeEdgeRow(this, other, weight); | |
_edges.Add(edge); | |
other._edges.Add(edge); | |
} | |
// much nicer | |
public bool IsCorridor => Type == "C" || Type == "c"; | |
public float DistanceInMetersTo(NodeRow other) | |
{ | |
// it is really easy to mess calls like this up: hide them behind a logical 'business' method | |
return SharedFunctions.GetDistanceFromLatLonInMeters(Lattitude, Longitude, other.Lattitude, other.Longitude); | |
} | |
} | |
// now idea what node-edge row is either, so here is just an Edge | |
/// <summary> | |
/// A two-way edge, to be shared between connected Nodes (connected nodes are unorded) | |
/// </summary> | |
public class NodeEdgeRow | |
{ | |
public NodeEdgeRow(NodeRow node1, NodeRow node2, float weight) | |
{ | |
// really important assumptions: document and enforce them | |
if (node1 == node2) | |
throw new ArgumentException("Cannot connect a node to itself"); | |
if (weight < 0) | |
throw new ArgumentException("Edge weight cannot be less than 0"); | |
Node1 = node1; | |
Node2 = node2; | |
Weight = weight; | |
} | |
public NodeRow Node1 { get; } | |
public NodeRow Node2 { get; } | |
public float Weight { get; } | |
public bool TryGetOther(NodeRow known, out NodeRow other) | |
{ | |
if (known.NodeId == Node1.NodeId) | |
{ | |
other = Node2; | |
return true; | |
} | |
else if (known.NodeId == Node2.NodeId) | |
{ | |
other = Node1; | |
return true; | |
} | |
else | |
{ | |
other = null; | |
return false; | |
} | |
} | |
} | |
public class Route | |
{ | |
public Route(float totalCost, IReadOnlyList<NodeRow> nodes) | |
{ | |
TotalCost = totalCost; | |
Nodes = nodes; | |
} | |
public float TotalCost { get; } | |
public IReadOnlyList<NodeRow> Nodes { get; } | |
} | |
public class AStar | |
{ | |
/// <summary> | |
/// Attempts to route a path from start to end through the given nodes | |
/// (Assumes that all nodes in the graph are found in allNodes) | |
/// </summary> | |
// If you can stop using NodeIds everywhere, you can ditch allNodes, and just pass the nodes to this method | |
public static Route BuildAStarOptimisation2(Dictionary<string, NodeRow> allNodes, string start, string end) | |
{ | |
if (!allNodes.ContainsKey(start)) | |
throw new ArgumentException("Start node is not present in allNodes"); | |
if (!allNodes.ContainsKey(end)) | |
throw new ArgumentException("End node is not present in allNodes"); | |
if (start == end) | |
{ // job done | |
return new Route(0f, new NodeRow[0]); | |
} | |
// lookup start and end, and find endCorridor | |
NodeRow startNode = allNodes[start]; | |
NodeRow endNode = allNodes[end]; | |
var endCorridor = FindNextCorridorNode(endNode); | |
// frontier priority queue | |
FastPriorityQueue<AStarNode> frontier = new FastPriorityQueue<AStarNode>(allNodes.Count); | |
// mapping between NodeId and AStarNode | |
Dictionary<string, AStarNode> queueTable = new Dictionary<string, AStarNode>(); | |
float heuristicFunction(NodeRow node) => node.IsCorridor ? node.DistanceInMetersTo(endCorridor) : 0f; // this heuristic no sense | |
// enque the first node... | |
var starter = new AStarNode(startNode, null, 0f); | |
queueTable.Add(startNode.NodeId, starter); | |
frontier.Enqueue(starter, heuristicFunction(startNode)); | |
// ... and off we go | |
while (frontier.Count > 0) | |
{ | |
AStarNode current = frontier.Dequeue(); | |
float currentCost = current.TrueCost; | |
if (current.Node.NodeId == endCorridor.NodeId) | |
{ | |
// route found: finish | |
return new Route(currentCost, BackTrack(current)); | |
} | |
current.MarkVisited(); | |
foreach (var edge in current.Node.Edges) | |
{ | |
if (!edge.TryGetOther(current.Node, out NodeRow edgeEnd)) | |
throw new Exception("Invalid topology: <insert descriptive exception message here so that someone who has to read it has a hope of debugging it without looking at your code"); | |
// find the node correspodning to edgeEnd if it already exists | |
bool edgeEndAlreadyPresent = queueTable.TryGetValue(edgeEnd.NodeId, out var target); | |
bool alreadyVisitedEdgeEnd = edgeEndAlreadyPresent && target.Visited; | |
if (alreadyVisitedEdgeEnd) | |
continue; // under assumptions, we can't perform better than this | |
float trueCost = currentCost + edge.Weight; | |
float heuristicCost = heuristicFunction(edgeEnd); | |
float combinedCost = trueCost + heuristicCost; | |
if (edgeEndAlreadyPresent) | |
{ | |
// only use our new route if it is better | |
if (combinedCost < target.TrueCost) | |
{ | |
target.Update(current, trueCost); | |
frontier.UpdatePriority(target, combinedCost); | |
} | |
} | |
else | |
{ | |
target = new AStarNode(edgeEnd, current, trueCost); | |
queueTable.Add(edgeEnd.NodeId, target); | |
frontier.Enqueue(target, combinedCost); | |
} | |
} | |
} | |
return null; | |
} | |
private static IReadOnlyList<NodeRow> BackTrack(AStarNode lastNode) | |
{ | |
List<NodeRow> route = new List<NodeRow>(); | |
while (lastNode != null) | |
{ | |
route.Add(lastNode.Node); | |
lastNode = lastNode.LeadingNode; | |
} | |
route.Reverse(); | |
return route; | |
} | |
// this can be static: make it so (it communicates that it can't be changing any state) | |
private static NodeRow FindNextCorridorNode(NodeRow node) | |
{ | |
var provisional = TryFindNextCorridorNode(node, new HashSet<string>()); | |
if (provisional != null) | |
{ | |
return provisional; | |
} | |
else | |
{ | |
throw new Exception("Invalid Graph"); // this should have a better message | |
} | |
} | |
// I'm guessing at what this is meant to do | |
private static NodeRow TryFindNextCorridorNode(NodeRow node, HashSet<string> seen) | |
{ | |
if (!seen.Add(node.NodeId)) | |
return null; | |
if (node.IsCorridor) | |
{ | |
return node; | |
} | |
foreach (NodeEdgeRow edge in node.Edges) | |
{ | |
if (edge.TryGetOther(node, out var other)) | |
{ | |
return other; | |
} | |
else | |
{ | |
throw new Exception("Invalid toplogy, etc. etc."); | |
} | |
} | |
foreach (NodeEdgeRow edge in node.Edges) | |
{ | |
var provisional = FindNextCorridorNode(edge.Node2); | |
if (provisional != null) | |
return provisional; | |
} | |
return null; | |
} | |
private class AStarNode : FastPriorityQueueNode | |
{ | |
/// <summary> | |
/// The Node wherefor this AStarNode is responsible | |
/// </summary> | |
public NodeRow Node { get; } // immutability is usually a good thing | |
/// <summary> | |
/// The node which leads to this node | |
/// null if there is no leading node | |
/// </summary> | |
public AStarNode LeadingNode { get; private set; } | |
/// <summary> | |
/// The total cost of the route up to this Node | |
/// </summary> | |
public float TrueCost { get; private set; } | |
/// <summary> | |
/// Indicates whether this node has been visited yet | |
/// </summary> | |
public bool Visited { get; private set; } | |
/// <summary> | |
/// Initiaislises an un-visited AStarNode | |
/// </summary> | |
public AStarNode(NodeRow node, AStarNode leadingNode, float trueCost) | |
{ | |
Node = node; | |
LeadingNode = leadingNode; | |
TrueCost = trueCost; | |
Visited = false; | |
} | |
public void MarkVisited() | |
{ | |
System.Diagnostics.Debug.Assert(!Visited); | |
Visited = true; | |
} | |
public void Update(AStarNode newLeadingNode, float newTrueCost) | |
{ | |
System.Diagnostics.Debug.Assert(newTrueCost < TrueCost); | |
LeadingNode = newLeadingNode; | |
TrueCost = newTrueCost; | |
} | |
} | |
} | |
[TestClass] | |
public class AStarTest | |
{ | |
[TestMethod] | |
public void ExampleTest() | |
{ | |
List<NodeRow> nodes = new List<NodeRow>(); | |
NodeRow newNode(string type, string nodeId, float lat, float @long) | |
{ | |
var node = new NodeRow(type, nodeId, lat, @long); | |
nodes.Add(node); | |
return node; | |
} | |
var startNode = newNode("c", "s", 0f, 0f); | |
var endNode = newNode("notc", "en", 1000f, 0f); | |
var endCorridor = newNode("C", "e", 8000f, 0f); | |
endNode.ConnectWith(endCorridor, 0); // NOTE: the implementation doesn't consider this weight | |
var a = newNode("c", "a", 400, 300); | |
var b = newNode("c", "b", 400, 0); | |
var c = newNode("c", "c", 400, -300); | |
var s = startNode; | |
var e = endCorridor; | |
// contrived map: best option is s->a->b->e | |
int five = 500; | |
s.ConnectWith(a, five); | |
s.ConnectWith(b, 400 + 1000); | |
s.ConnectWith(c, five + 1000); | |
a.ConnectWith(b, 300); | |
b.ConnectWith(c, 300); | |
e.ConnectWith(a, five + 1000); | |
e.ConnectWith(b, 400); | |
e.ConnectWith(c, five + 1000); | |
float optimalRoute = five + 400 + 300; | |
Dictionary<string, NodeRow> allNodes = nodes.ToDictionary(node => node.NodeId, node => node); | |
var route = AStar.BuildAStarOptimisation2(allNodes, startNode.NodeId, endNode.NodeId); | |
Assert.IsNotNull(route, "Route should not be null"); | |
Assert.IsNotNull(route.Nodes, "Routes nodes should not be null"); | |
Assert.AreEqual(optimalRoute, route.TotalCost, 0.001, "Route length was outside tollerances"); | |
Assert.AreEqual(MakeRouteString(s, a, b, e), MakeRouteString(route.Nodes), false, "Route nodes incorrect"); | |
} | |
private static string MakeRouteString(params NodeRow[] path) => MakeRouteString((IReadOnlyList<NodeRow>)path); | |
private static string MakeRouteString(IReadOnlyList<NodeRow> path) | |
{ | |
return string.Join("->", path.Select(n => n.NodeId)); | |
} | |
} | |
} |
I will try implementing something from this code.
In the mean time, to try and answer some of your questions and explain the structure:
The graph is made up of nodes which can be represented by a NodeRow
object. Named as such because it links into an EF database.
The graph is technically a directed weighted graph, however, all edges are traversable in both directions.
A node has a bunch of properties but the only ones that are important for this are:
nodeId
- A unique ID for this node, guaranteed to be unique by other validation in the system.Latitude
andLongitude
Edges
- A List ofNodeEdgeRow
objects, once again from the database.
A NodeEdgeRow
object connects two NodeRow
objects, Node1
, Node2
and has a cost Weight
Basically speaking, the graph consists of Corridor Nodes and Non-Corridor nodes. A corridor node has a latitude and longitude, a non-corridor node does not. The weight of an edge connecting two corridor nodes is equal to the distance in meters between the two nodes' lat/lon points. The weight of an edge connecting a corridor to a non-corridor or two non-corridors is equal to a large integer, used to ensure that non-corridors are only ever part of a route when absolutely necessary. This large integer is usually 999,999
but may be slightly lower depending on the priority that this non-corridor should take over others.
Therefore as far as I can see using a small constant as a heuristic cost when the distance cannot be calculated is admissible because the actual cost will always be much higher. It may be that a different constant value gives better routes but I don't know enough to work that out. Either way, I can't think of a different way of providing an admissible heuristic without knowing the distance when everything else is based on distance, at least not without significant performance impacts.
If I'm understanding correctly, a non-Corridor node will have edges with (very) high weights? Even so, that still means the heurisitic can be an underestimate of itself (not-monotone) which is a problem. Simply using the same distance metric for all nodes would resolve this issue (assuming it is meaningful in your domain). Is your concern that computing the heuristic is expensive? (missed the bit about non-corridor nodes not having a lat/long)
(Note: if your heurisitc is not admissible, then A* won't necessarily find the optimal route, but it can still give you a good estimate (if it's close enough), so maybe ignoring this is fine for your usecase)
Thanks for your help! It looks like I've got A* working from your code above, and from a couple of test runs it's about 100x faster than the previous algorithm, which is nice.
I'll do some testing comparing the generated routes against routes generated by the previous Dijkstra's implementation to see how good the routes are in terms of optimality.
IIRC the lowest value of the very high weighted edges is 88888 and the highest is 999999, would setting the heuristic to somewhere in the range when a lat/lon distance cannot be calculated be better than using 0?
Do you know of any decent in detail explanations of A* with Priority Queues? Would be nice to understand what I've just written a little more than I do currently. I'm pretty sure I understand the general concept of Dijkstra's and A* but some of what is being done in your code I have no idea why it is like that.
I couldn't recommend any reading in particular... I've just sort of being writing A* for years (I pretty much just throw heuristic search at everything and hope it's 'good enough'). I'm not an expert on the subject, but the real complexity is in the priority queue implementation.
Which bits don't make sense? There is a reasonable chance I'm just doing something daft.
(Just spotted a blatant error: missing queueTable.Add(startNode.NodeId, starter);
- probably a non-issue, but not good)
This should convert to Dijkstra's by taking out the heuristic stuff, right?
Indeed, if you replace the heuristic function with return 0
then it is (should be) equivalent to a Dijkstra's.
Notes:
Visited
,LeadingMap
, andExistingCosts
with state inAStarNode
. This might not be the best thing to do, but it's certainly much tidier than the alternative