Skip to content

Instantly share code, notes, and snippets.

@VisualMelon
Last active September 7, 2018 13:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save VisualMelon/cb057316dfd5e00858dc6cab250ef138 to your computer and use it in GitHub Desktop.
Save VisualMelon/cb057316dfd5e00858dc6cab250ef138 to your computer and use it in GitHub Desktop.
using Microsoft.VisualStudio.TestTools.UnitTesting;
using Priority_Queue;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace CodeReview.AnotherAStar
{
public class SharedFunctions
{
public static float GetDistanceFromLatLonInMeters(float lat1, float long1, float lat2, float long2)
{
// I am not a geographer
float dlat = lat1 - lat2;
float dlong = lat2 - long2;
return (float)Math.Sqrt(dlat * dlat + dlong * dlong);
}
}
// I have no idea what a NodeRow is... so these are just Nodes
// are your nodes structs? I am confused why you use the Id everywhere... (if they are classes, then you can probably drop all the nodeId stuff: it just adds unhelpful indirection)
public class NodeRow
{
public NodeRow(string type, string nodeId, float lattitude, float longitude)
{
NodeId = nodeId;
Lattitude = lattitude;
Longitude = longitude;
}
public string Type { get; }
public string NodeId { get; }
public float Lattitude { get; }
public float Longitude { get; }
private readonly List<NodeEdgeRow> _edges = new List<NodeEdgeRow>();
public IReadOnlyList<NodeEdgeRow> Edges => _edges;
/// <summary>
/// Connects this node to another with a two-way edge
/// </summary>
public void ConnectWith(NodeRow other, float weight)
{
// enforce contracts on the topology
if (other == null)
throw new ArgumentNullException(nameof(other));
if (other == this)
throw new ArgumentException("Cannot connect a node to itself", nameof(other));
var edge = new NodeEdgeRow(this, other, weight);
_edges.Add(edge);
other._edges.Add(edge);
}
// much nicer
public bool IsCorridor => Type == "C" || Type == "c";
public float DistanceInMetersTo(NodeRow other)
{
// it is really easy to mess calls like this up: hide them behind a logical 'business' method
return SharedFunctions.GetDistanceFromLatLonInMeters(Lattitude, Longitude, other.Lattitude, other.Longitude);
}
}
// now idea what node-edge row is either, so here is just an Edge
/// <summary>
/// A two-way edge, to be shared between connected Nodes (connected nodes are unorded)
/// </summary>
public class NodeEdgeRow
{
public NodeEdgeRow(NodeRow node1, NodeRow node2, float weight)
{
// really important assumptions: document and enforce them
if (node1 == node2)
throw new ArgumentException("Cannot connect a node to itself");
if (weight < 0)
throw new ArgumentException("Edge weight cannot be less than 0");
Node1 = node1;
Node2 = node2;
Weight = weight;
}
public NodeRow Node1 { get; }
public NodeRow Node2 { get; }
public float Weight { get; }
public bool TryGetOther(NodeRow known, out NodeRow other)
{
if (known.NodeId == Node1.NodeId)
{
other = Node2;
return true;
}
else if (known.NodeId == Node2.NodeId)
{
other = Node1;
return true;
}
else
{
other = null;
return false;
}
}
}
public class Route
{
public Route(float totalCost, IReadOnlyList<NodeRow> nodes)
{
TotalCost = totalCost;
Nodes = nodes;
}
public float TotalCost { get; }
public IReadOnlyList<NodeRow> Nodes { get; }
}
public class AStar
{
/// <summary>
/// Attempts to route a path from start to end through the given nodes
/// (Assumes that all nodes in the graph are found in allNodes)
/// </summary>
// If you can stop using NodeIds everywhere, you can ditch allNodes, and just pass the nodes to this method
public static Route BuildAStarOptimisation2(Dictionary<string, NodeRow> allNodes, string start, string end)
{
if (!allNodes.ContainsKey(start))
throw new ArgumentException("Start node is not present in allNodes");
if (!allNodes.ContainsKey(end))
throw new ArgumentException("End node is not present in allNodes");
if (start == end)
{ // job done
return new Route(0f, new NodeRow[0]);
}
// lookup start and end, and find endCorridor
NodeRow startNode = allNodes[start];
NodeRow endNode = allNodes[end];
var endCorridor = FindNextCorridorNode(endNode);
// frontier priority queue
FastPriorityQueue<AStarNode> frontier = new FastPriorityQueue<AStarNode>(allNodes.Count);
// mapping between NodeId and AStarNode
Dictionary<string, AStarNode> queueTable = new Dictionary<string, AStarNode>();
float heuristicFunction(NodeRow node) => node.IsCorridor ? node.DistanceInMetersTo(endCorridor) : 0f; // this heuristic no sense
// enque the first node...
var starter = new AStarNode(startNode, null, 0f);
queueTable.Add(startNode.NodeId, starter);
frontier.Enqueue(starter, heuristicFunction(startNode));
// ... and off we go
while (frontier.Count > 0)
{
AStarNode current = frontier.Dequeue();
float currentCost = current.TrueCost;
if (current.Node.NodeId == endCorridor.NodeId)
{
// route found: finish
return new Route(currentCost, BackTrack(current));
}
current.MarkVisited();
foreach (var edge in current.Node.Edges)
{
if (!edge.TryGetOther(current.Node, out NodeRow edgeEnd))
throw new Exception("Invalid topology: <insert descriptive exception message here so that someone who has to read it has a hope of debugging it without looking at your code");
// find the node correspodning to edgeEnd if it already exists
bool edgeEndAlreadyPresent = queueTable.TryGetValue(edgeEnd.NodeId, out var target);
bool alreadyVisitedEdgeEnd = edgeEndAlreadyPresent && target.Visited;
if (alreadyVisitedEdgeEnd)
continue; // under assumptions, we can't perform better than this
float trueCost = currentCost + edge.Weight;
float heuristicCost = heuristicFunction(edgeEnd);
float combinedCost = trueCost + heuristicCost;
if (edgeEndAlreadyPresent)
{
// only use our new route if it is better
if (combinedCost < target.TrueCost)
{
target.Update(current, trueCost);
frontier.UpdatePriority(target, combinedCost);
}
}
else
{
target = new AStarNode(edgeEnd, current, trueCost);
queueTable.Add(edgeEnd.NodeId, target);
frontier.Enqueue(target, combinedCost);
}
}
}
return null;
}
private static IReadOnlyList<NodeRow> BackTrack(AStarNode lastNode)
{
List<NodeRow> route = new List<NodeRow>();
while (lastNode != null)
{
route.Add(lastNode.Node);
lastNode = lastNode.LeadingNode;
}
route.Reverse();
return route;
}
// this can be static: make it so (it communicates that it can't be changing any state)
private static NodeRow FindNextCorridorNode(NodeRow node)
{
var provisional = TryFindNextCorridorNode(node, new HashSet<string>());
if (provisional != null)
{
return provisional;
}
else
{
throw new Exception("Invalid Graph"); // this should have a better message
}
}
// I'm guessing at what this is meant to do
private static NodeRow TryFindNextCorridorNode(NodeRow node, HashSet<string> seen)
{
if (!seen.Add(node.NodeId))
return null;
if (node.IsCorridor)
{
return node;
}
foreach (NodeEdgeRow edge in node.Edges)
{
if (edge.TryGetOther(node, out var other))
{
return other;
}
else
{
throw new Exception("Invalid toplogy, etc. etc.");
}
}
foreach (NodeEdgeRow edge in node.Edges)
{
var provisional = FindNextCorridorNode(edge.Node2);
if (provisional != null)
return provisional;
}
return null;
}
private class AStarNode : FastPriorityQueueNode
{
/// <summary>
/// The Node wherefor this AStarNode is responsible
/// </summary>
public NodeRow Node { get; } // immutability is usually a good thing
/// <summary>
/// The node which leads to this node
/// null if there is no leading node
/// </summary>
public AStarNode LeadingNode { get; private set; }
/// <summary>
/// The total cost of the route up to this Node
/// </summary>
public float TrueCost { get; private set; }
/// <summary>
/// Indicates whether this node has been visited yet
/// </summary>
public bool Visited { get; private set; }
/// <summary>
/// Initiaislises an un-visited AStarNode
/// </summary>
public AStarNode(NodeRow node, AStarNode leadingNode, float trueCost)
{
Node = node;
LeadingNode = leadingNode;
TrueCost = trueCost;
Visited = false;
}
public void MarkVisited()
{
System.Diagnostics.Debug.Assert(!Visited);
Visited = true;
}
public void Update(AStarNode newLeadingNode, float newTrueCost)
{
System.Diagnostics.Debug.Assert(newTrueCost < TrueCost);
LeadingNode = newLeadingNode;
TrueCost = newTrueCost;
}
}
}
[TestClass]
public class AStarTest
{
[TestMethod]
public void ExampleTest()
{
List<NodeRow> nodes = new List<NodeRow>();
NodeRow newNode(string type, string nodeId, float lat, float @long)
{
var node = new NodeRow(type, nodeId, lat, @long);
nodes.Add(node);
return node;
}
var startNode = newNode("c", "s", 0f, 0f);
var endNode = newNode("notc", "en", 1000f, 0f);
var endCorridor = newNode("C", "e", 8000f, 0f);
endNode.ConnectWith(endCorridor, 0); // NOTE: the implementation doesn't consider this weight
var a = newNode("c", "a", 400, 300);
var b = newNode("c", "b", 400, 0);
var c = newNode("c", "c", 400, -300);
var s = startNode;
var e = endCorridor;
// contrived map: best option is s->a->b->e
int five = 500;
s.ConnectWith(a, five);
s.ConnectWith(b, 400 + 1000);
s.ConnectWith(c, five + 1000);
a.ConnectWith(b, 300);
b.ConnectWith(c, 300);
e.ConnectWith(a, five + 1000);
e.ConnectWith(b, 400);
e.ConnectWith(c, five + 1000);
float optimalRoute = five + 400 + 300;
Dictionary<string, NodeRow> allNodes = nodes.ToDictionary(node => node.NodeId, node => node);
var route = AStar.BuildAStarOptimisation2(allNodes, startNode.NodeId, endNode.NodeId);
Assert.IsNotNull(route, "Route should not be null");
Assert.IsNotNull(route.Nodes, "Routes nodes should not be null");
Assert.AreEqual(optimalRoute, route.TotalCost, 0.001, "Route length was outside tollerances");
Assert.AreEqual(MakeRouteString(s, a, b, e), MakeRouteString(route.Nodes), false, "Route nodes incorrect");
}
private static string MakeRouteString(params NodeRow[] path) => MakeRouteString((IReadOnlyList<NodeRow>)path);
private static string MakeRouteString(IReadOnlyList<NodeRow> path)
{
return string.Join("->", path.Select(n => n.NodeId));
}
}
}
@VisualMelon
Copy link
Author

VisualMelon commented Sep 6, 2018

Notes:

  • Anything to do with Corridors was made up on the spot, and is probably wrong
  • I've replaced Visited, LeadingMap, and ExistingCosts with state in AStarNode. This might not be the best thing to do, but it's certainly much tidier than the alternative
  • The distance calcuilation is just euclidian space, nothing proper
  • I changed the endNode check to look for endCorridor: this is because endNode might not be a corricor, and things that arn't corridors are problems (see below)
  • Your heurisitic makes no sense if any of the nodes are not corridors: for A* to produce an optimal result, your heurisitc MUST be admissible, meanining that it never over-estimates the cost of the route (or indeed itself). Using a constant cost under some circumstances makes zero sense.
  • I don't know what NodeRow or NodeEdgeRow look like, so I've just kept the names and made them single nodes and edges

@JackSteel97
Copy link

I will try implementing something from this code.
In the mean time, to try and answer some of your questions and explain the structure:

The graph is made up of nodes which can be represented by a NodeRow object. Named as such because it links into an EF database.
The graph is technically a directed weighted graph, however, all edges are traversable in both directions.
A node has a bunch of properties but the only ones that are important for this are:

  • nodeId - A unique ID for this node, guaranteed to be unique by other validation in the system.
  • Latitude and Longitude
  • Edges - A List of NodeEdgeRow objects, once again from the database.

A NodeEdgeRow object connects two NodeRow objects, Node1, Node2 and has a cost Weight

Basically speaking, the graph consists of Corridor Nodes and Non-Corridor nodes. A corridor node has a latitude and longitude, a non-corridor node does not. The weight of an edge connecting two corridor nodes is equal to the distance in meters between the two nodes' lat/lon points. The weight of an edge connecting a corridor to a non-corridor or two non-corridors is equal to a large integer, used to ensure that non-corridors are only ever part of a route when absolutely necessary. This large integer is usually 999,999 but may be slightly lower depending on the priority that this non-corridor should take over others.

Therefore as far as I can see using a small constant as a heuristic cost when the distance cannot be calculated is admissible because the actual cost will always be much higher. It may be that a different constant value gives better routes but I don't know enough to work that out. Either way, I can't think of a different way of providing an admissible heuristic without knowing the distance when everything else is based on distance, at least not without significant performance impacts.

@VisualMelon
Copy link
Author

VisualMelon commented Sep 6, 2018

If I'm understanding correctly, a non-Corridor node will have edges with (very) high weights? Even so, that still means the heurisitic can be an underestimate of itself (not-monotone) which is a problem. Simply using the same distance metric for all nodes would resolve this issue (assuming it is meaningful in your domain). Is your concern that computing the heuristic is expensive? (missed the bit about non-corridor nodes not having a lat/long)

(Note: if your heurisitc is not admissible, then A* won't necessarily find the optimal route, but it can still give you a good estimate (if it's close enough), so maybe ignoring this is fine for your usecase)

@JackSteel97
Copy link

Thanks for your help! It looks like I've got A* working from your code above, and from a couple of test runs it's about 100x faster than the previous algorithm, which is nice.

I'll do some testing comparing the generated routes against routes generated by the previous Dijkstra's implementation to see how good the routes are in terms of optimality.

IIRC the lowest value of the very high weighted edges is 88888 and the highest is 999999, would setting the heuristic to somewhere in the range when a lat/lon distance cannot be calculated be better than using 0?

Do you know of any decent in detail explanations of A* with Priority Queues? Would be nice to understand what I've just written a little more than I do currently. I'm pretty sure I understand the general concept of Dijkstra's and A* but some of what is being done in your code I have no idea why it is like that.

@VisualMelon
Copy link
Author

I couldn't recommend any reading in particular... I've just sort of being writing A* for years (I pretty much just throw heuristic search at everything and hope it's 'good enough'). I'm not an expert on the subject, but the real complexity is in the priority queue implementation.

Which bits don't make sense? There is a reasonable chance I'm just doing something daft.

@VisualMelon
Copy link
Author

VisualMelon commented Sep 6, 2018

(Just spotted a blatant error: missing queueTable.Add(startNode.NodeId, starter); - probably a non-issue, but not good)

@JackSteel97
Copy link

This should convert to Dijkstra's by taking out the heuristic stuff, right?

@VisualMelon
Copy link
Author

Indeed, if you replace the heuristic function with return 0 then it is (should be) equivalent to a Dijkstra's.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment