Last active
December 28, 2015 00:18
-
-
Save cwoac/7412148 to your computer and use it in GitHub Desktop.
assignment3.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// C for C programmers | |
// Assignment 3 | |
// compiles and runs under g++ 4.8.1 with -std=c++11 also | |
// with strict language and error checking turned on | |
// (-ansi -std=c++11 -Wall -Wextra -Werror -Wpedantic) | |
// | |
// Note that I do NOT use 'using namespace std' - this is considered | |
// bad practise as it leads to namespace pollution - see, for instance, | |
// the style guide linked to in the course: | |
// http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces | |
// | |
// Also, in real world this would very much be done as multiple files, | |
// properly split out; this is condensed into a single file to aid reviewing. | |
// | |
// This program will look for a file called 'input.txt' and attempt to load that. | |
#include <iostream> | |
#include <fstream> | |
#include <vector> | |
#include <tuple> | |
#include <map> | |
#include <cstdlib> | |
#include <utility> // for pair | |
#include <algorithm> // for the various heap functions | |
// short helper function to get a random number in the range [lower..upper] | |
// In practise this will not give perfectly uniform distribution due to the inaccuracies | |
// of double precision, espectially v.close to the edges. However it is good enough | |
// for here. | |
inline double rand( double lower, double upper ) | |
{ | |
return lower + (std::rand() * (upper-lower) / RAND_MAX); | |
} | |
// compare_nodes helper functions needed for the various *_heap functions | |
// These functions want the largest value on the top of the heap, but we want | |
// the smallest distance of the pair instead. | |
struct compare_nodes | |
{ | |
// compare a pair of (node,cost) pairs | |
bool operator()( const std::pair<unsigned int,double> &left, | |
const std::pair<unsigned int,double> &right ) | |
{ | |
return left.second > right.second; | |
} | |
// compare a pair of (node,node,cost) tuples | |
bool operator()( const std::tuple<unsigned int,unsigned int,double> &left, | |
const std::tuple<unsigned int,unsigned int,double> &right ) | |
{ | |
return std::get<2>(left) > std::get<2>(right); | |
} | |
}; | |
//////////////////////////////////////////////////////////////////////////////// | |
// The edgelist is a list of all nodes that can be reached from a single node | |
// and it's distances | |
class EdgeList | |
{ | |
public: | |
// create a new, empty edgelist | |
EdgeList( unsigned int index=0 ) : index(index) {} | |
// getter for index (read-only, so no setter) | |
unsigned int get_index( void ) | |
{ | |
return index; | |
} | |
// adds an edge to the list | |
void add_edge( unsigned int index, double distance ) | |
{ | |
// need to check whether this route already exists or not. | |
if( data.find(index) != data.end() ) | |
{ | |
// it does. Is this route better? | |
double current = data[index]; | |
if ( distance < current ) | |
{ | |
// yes. Store it. | |
data[index] = distance; | |
} | |
} | |
else | |
{ | |
data[index] = distance; | |
} | |
} | |
// expose the data iterator for the operator overload (begin) | |
std::map<unsigned int,double>::iterator begin( void ) | |
{ | |
return data.begin(); | |
} | |
// expose the data iterator for the operator overload (end) | |
std::map<unsigned int,double>::iterator end( void ) | |
{ | |
return data.end(); | |
} | |
// return the total length of edges in the list | |
double total_edge_distance( void ) | |
{ | |
double result=0.0; | |
for( std::map<unsigned int,double>::iterator data_iter=data.begin(); | |
data_iter!=data.end(); | |
++data_iter ) | |
{ | |
result += data_iter->second; | |
} | |
return result; | |
} | |
private: | |
// data is stored as a map from node index (unsigned int) to distance (double) | |
std::map<unsigned int,double> data; | |
// the source node index for this edgelist | |
unsigned int index; | |
}; | |
// ostream override for pretty printing of EdgeList | |
std::ostream &operator<<( std::ostream &out, EdgeList &edge_list ) | |
{ | |
out << edge_list.get_index() << ": "; | |
for( std::map<unsigned int,double>::iterator data_iter=edge_list.begin(); | |
data_iter != edge_list.end(); | |
++data_iter ) | |
{ | |
//first is the index, second the distance | |
out << data_iter->first << " (" << data_iter->second << ") "; | |
} | |
out << std::endl; | |
return out; | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
// A Graph is a set of nodes and weighted, undirected edges between them | |
class Graph | |
{ | |
public: | |
Graph( unsigned int node_count, double edge_density, double minimum_distance=1.0, double maximum_distance=10.0) : node_count(node_count) | |
{ | |
// the chosen distance between two nodes. | |
// Declared here to prevent needless reallocation. | |
double distance; | |
// rand will return RAND_MAX+1 possible ints, but we will want to create | |
// an edge for only edge_density of these. | |
// Compute this now to save on repeated calculations. | |
// Note that we use a long here in case an edge density of >1.0 is used | |
const long edge_chance = static_cast<long> (RAND_MAX * edge_density); | |
// initialise the vector of EdgeLists | |
for( unsigned int i=0; i<node_count; ++i ) | |
{ | |
data.push_back( EdgeList(i) ); | |
} | |
// iterate over each (node,node) pair in turn, and consider making an edge between them. | |
for( unsigned int i=0; i<node_count; ++i ) | |
{ | |
// note that in Dijkstra's algorithm, reflexive (node to itself) links | |
// are effectively ignored as it will always be quicker to not take | |
// that edge. We can safely ignore any such (nonsensical) links. | |
// Also, since we are looking at undirected graphs, we only need to | |
// consider the triangular matrix - hence starting j from i+1 and | |
// assigning the edge j->i as well as i->j | |
for( unsigned int j=i+1; j<node_count; ++j ) | |
{ | |
if( std::rand() <= edge_chance ) | |
{ | |
distance = rand( minimum_distance,maximum_distance ); | |
data[i].add_edge( j,distance ); | |
data[j].add_edge( i,distance ); | |
} | |
} | |
} | |
} | |
// Constructor to load from a file | |
// File format is single number on first line (to indicated node count) | |
// then one line per edge: to, from, cost; nodes indexed from 0 | |
Graph( std::string filename ) | |
{ | |
// attempt to open the file for reading. | |
std::ifstream in_file; | |
in_file.open( filename.c_str() ); | |
if( in_file.fail() ) | |
{ | |
// handle error case | |
std::cout << "Unable to read file." << std::endl; | |
return; | |
} | |
// read the node_count | |
in_file >> node_count; | |
// initialise the vector of EdgeLists | |
for( unsigned int i=0; i<node_count; ++i ) | |
{ | |
data.push_back( EdgeList(i) ); | |
} | |
// now read the edges in. | |
unsigned int i,j; | |
double cost; | |
// keep going as long as we are able to read the triplets | |
while( in_file >> i >> j >> cost ) | |
{ | |
// sanity check the values. | |
if( i>=node_count || j>=node_count ) | |
{ | |
std::cout << "Invalid value in graph - node outside range ("<<i<<","<<j<<","<<cost<<")"<<std::endl; | |
return; | |
} | |
// create the edge | |
data[i].add_edge( j,cost ); | |
} | |
in_file.close(); | |
} | |
// Constructor to create a blank graph. | |
// Most useful for MST construction | |
Graph( unsigned int node_count=0 ) : node_count(node_count) | |
{ | |
// initialise the vector of EdgeLists | |
for( unsigned int i=0; i<node_count; ++i ) | |
{ | |
data.push_back( EdgeList(i) ); | |
} | |
} | |
// calculate_shortest_path | |
// Given a graph and two nodes within the graph, finds the shortest path | |
// between thema via Dijkstra's algorithm | |
double calculate_shortest_path( unsigned int from, unsigned int to) | |
{ | |
// sanity check provided values | |
if( from==to || from >= node_count || to >= node_count ) | |
{ | |
return 0.0; | |
} | |
// create a map of places we *know* the shortest distance to. | |
// implicitly, we have visited ourself with a distance of 0 | |
std::map<unsigned int,double> visited; | |
visited[from] = 0.0; | |
// now create a vector of (index,distance pairings) of potential next | |
// points and what it would cost to get there from the visited set. | |
// Seed this with all visitable nodes from the start point. | |
std::vector< std::pair<unsigned int,double> > next; | |
for( std::map<unsigned int,double>::iterator next_iter=data[from].begin(); | |
next_iter!=data[from].end(); | |
++next_iter) | |
{ | |
next.push_back( std::make_pair(next_iter->first,next_iter->second) ); | |
} | |
//sort it so that the smallest item is on the top | |
std::make_heap( next.begin(), next.end(), compare_nodes() ); | |
// now we are all set up, call the inner recursive function to do the work. | |
return calculate_shortest_path_recurse(to,visited,next); | |
} | |
// total_edge_distance | |
// returns the total length of all edges in the graph. | |
double total_edge_distance( void ) | |
{ | |
double result=0.0; | |
for( std::vector<EdgeList>::iterator data_iter=data.begin(); | |
data_iter != data.end(); | |
++data_iter ) | |
{ | |
result += data_iter->total_edge_distance(); | |
} | |
return result; | |
} | |
// calculate_spanning_tree | |
// Computes a minimal spanning tree of the graph using Prim's algorithm | |
void calculate_spanning_tree( Graph &G ) | |
{ | |
// create a list of nodes we have added to the tree. | |
// we need to have a single arbitary node to act as the root of the tree. | |
// For this we pick node 0 | |
const unsigned int start_node = 0; | |
std::vector< unsigned int > nodes; | |
nodes.push_back( start_node ); | |
// initialise the MST. | |
G = Graph( node_count ); | |
// now create a vector of (node,node,distance) tuples of potential next | |
// points and what it would cost to get there from the visited set. | |
// Seed this with all visitable nodes from the start point. | |
std::vector< std::tuple<unsigned int,unsigned int,double> > edges; | |
for( std::map<unsigned int,double>::iterator edges_iter = data[start_node].begin(); | |
edges_iter!=data[start_node].end(); | |
++edges_iter ) | |
{ | |
// the resultant tree will never have reflexive loops, so drop any now | |
if( edges_iter->first == start_node ) | |
{ | |
continue; | |
} | |
edges.push_back(std::make_tuple(start_node,edges_iter->first,edges_iter->second)); | |
} | |
//sort it so that the smallest item is on the top | |
std::make_heap( edges.begin(), edges.end(), compare_nodes() ); | |
// declare variables we will use inside the while loop | |
// the next selected edge to be added. | |
std::tuple< unsigned int,unsigned int,double > edge; | |
// the next selected node to be added | |
unsigned int node; | |
// have we already seen a link to a given node? | |
std::vector< std::tuple<unsigned int,unsigned int,double> >::iterator existing_edge; | |
// now we are all set up, start the main loop | |
// note the second exit condition is in case of disjoint graphs | |
while( nodes.size() < node_count && edges.size()>0) | |
{ | |
//grab the shortest edge | |
edge = edges.front(); | |
// need to refer to the new node repeatedly, so grab it out. | |
node = std::get<1>(edge); | |
// heaps are weird in stl. need to use pop_heap to move the front | |
// to the back then pop that. | |
std::pop_heap( edges.begin(),edges.end(),compare_nodes() ); | |
edges.pop_back(); | |
// add this edge to the graph. | |
G.data[std::get<0>(edge)].add_edge( node,std::get<2>(edge) ); | |
// note that we have now visited that node | |
nodes.push_back(node); | |
// if we have reached the exit condition, we don't need to consider the | |
// next edges | |
if( nodes.size()==node_count ) | |
{ | |
break; | |
} | |
// add in any edges from the new node to as-yet unselected nodes | |
for( std::map<unsigned int,double>::iterator new_iter=data[node].begin(); | |
new_iter!=data[node].end(); | |
++new_iter ) | |
{ | |
// check that the destination node is not already visited | |
if( std::find(nodes.begin(),nodes.end(),new_iter->first) != nodes.end() ) | |
{ | |
// don't bother with this edge | |
continue; | |
} | |
// need to only maintain the shortest link to a given potential target node. | |
bool already_found = false; | |
for( std::vector< std::tuple<unsigned int,unsigned int,double> >::iterator edge_iter=edges.begin(); | |
edge_iter!=edges.end(); | |
++edge_iter ) | |
{ | |
if( std::get<1>(*edge_iter) == new_iter->first ) | |
{ | |
// we already have a potential link to this node | |
already_found = true; | |
// check to see if this one is better | |
if( std::get<2>(*edge_iter) > new_iter->second ) | |
{ | |
// it is. switch to the more promising link | |
*edge_iter=std::make_tuple( node, new_iter->first, new_iter->second ); | |
// need to rework the heap, potentially | |
std::make_heap( edges.begin(),edges.end(),compare_nodes() ); | |
// all done for this one, break out | |
break; | |
} | |
} | |
} | |
// have we handled this edge now? | |
if( already_found==true ) | |
{ | |
continue; | |
} | |
// add this edge as a potential next step. | |
edges.push_back( std::make_tuple(node,new_iter->first,new_iter->second) ); | |
// resort the heap for the recurse | |
std::push_heap( edges.begin(),edges.end(),compare_nodes() ); | |
} | |
} | |
} | |
// calculate_average_distance calculates the average path length in the graph | |
// from node 0 to all other nodes. | |
double calculate_average_distance( void ) | |
{ | |
// the total distance along the paths and the distance between two specific nodes | |
double total_distance=0.0, distance=0.0; | |
// the number of nodes linked in the graph | |
unsigned int number_linked=0; | |
// calculate the average distance between node 0 and all other nodes | |
for( unsigned int i=1; i<node_count; ++i ) | |
{ | |
distance = calculate_shortest_path(0,i); | |
// check that we have a path | |
if( distance > 0.0 ) | |
{ | |
// build up the running total of distances and link count | |
total_distance += distance; | |
number_linked++; | |
} | |
} | |
return total_distance/number_linked; | |
} | |
// expose the data iterator for the operator overload (begin) | |
std::vector<EdgeList>::iterator begin( void ) | |
{ | |
return data.begin(); | |
} | |
// expose the data iterator for the operator overload (end) | |
std::vector<EdgeList>::iterator end( void ) | |
{ | |
return data.end(); | |
} | |
private: | |
// calculate_shortest_path_recurse | |
// The recursive loop for Dijkstra's algorithm. | |
// Picks a single node from next, | |
// adds it to visited, | |
// checks for completion, | |
// augments next with any new items | |
// recurses onwards. | |
double calculate_shortest_path_recurse( unsigned int to, | |
std::map<unsigned int,double> &visited, | |
std::vector< std::pair<unsigned int,double> > &next ) | |
{ | |
// check we haven't run out of places | |
if( next.size()==0 ) | |
{ | |
return 0.0; | |
} | |
// take the next node off the list. | |
std::pair<unsigned int,double> node=next.front(); | |
// heaps are weird in stl. need to use pop_heap to move the front | |
// to the back then pop that. | |
std::pop_heap( next.begin(),next.end(),compare_nodes() ); | |
next.pop_back(); | |
// add it to the visited list | |
visited[node.first] = node.second; | |
// check for exit condition. | |
if( node.first==to ) | |
{ | |
return node.second; | |
} | |
//nope, so consider list of potential new next nodes | |
for( std::map<unsigned int,double>::iterator edge_iter=data[node.first].begin(); | |
edge_iter!=data[node.first].end(); | |
++edge_iter ) | |
{ | |
// first check that the node is not already visited | |
if( visited.find(edge_iter->first) != visited.end() ) | |
{ | |
// don't bother with this one, we already have a best path | |
continue; | |
} | |
// is it already on the next list? | |
bool already_found = false; | |
for( std::vector< std::pair<unsigned int,double> >::iterator next_iter=next.begin(); | |
next_iter!=next.end(); | |
++next_iter ) | |
{ | |
if( next_iter->first==edge_iter->first ) | |
{ | |
// it is! But is this a better route? | |
if( edge_iter->second + node.second < next_iter->second ) | |
{ | |
// yes! note the new distance | |
next_iter->second = edge_iter->second + node.second; | |
already_found = true; | |
// need to rework the heap, potentially | |
std::make_heap( next.begin(),next.end(),compare_nodes() ); | |
// all done for this one, break out | |
break; | |
} | |
} | |
} | |
// did we parse this node? | |
if( already_found==true ) | |
{ | |
continue; | |
} | |
// it has yet to appear in next, so add it | |
// note that we need to add node.second to reflect the true cost of getting there. | |
next.push_back( std::make_pair(edge_iter->first,node.second+edge_iter->second) ); | |
std::push_heap( next.begin(),next.end(),compare_nodes() ); | |
} | |
// now we have set up for the next iteration, recurse in. | |
return calculate_shortest_path_recurse( to,visited,next ); | |
} | |
// data is a vector of edgelists. | |
// the Nth vector in data is a map of all the nodes reachable from node N and their distance-costs. | |
std::vector< EdgeList > data; | |
unsigned int node_count; | |
}; | |
// ostream override for pretty printing of Graph | |
std::ostream &operator<<( std::ostream &out, Graph &graph ) | |
{ | |
for( std::vector<EdgeList>::iterator data_iter=graph.begin(); | |
data_iter != graph.end(); | |
++data_iter ) | |
{ | |
out << *data_iter; | |
} | |
return out; | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
// entry point to the program | |
int main( void ) | |
{ | |
// load in the data file. | |
Graph G = Graph( "input.txt" ); | |
// Create the spanning tree | |
Graph MST; | |
G.calculate_spanning_tree(MST); | |
// print out the spanning tree and its costs | |
std::cout << MST << std::endl; | |
std::cout << "Total distance of tree: " << MST.total_edge_distance() << std::endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment