cwoac/assignment2.cpp

## assignment2.cpp
// C for C programmers
// Assignment 2
// compiles and runs under g++ 4.8.1 with no options and also
// with strict language and error checking turned on
// (-ansi -std=c++98 -Wall -Wextra -Werror -Wpedantic)
//
// Note that I do NOT use 'using namespace std' - this is considered
// bad practise as it leads to namespace pollution - see, for instance,
// the style guide linked to in the course:
// http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces
//
// Also, in real world this would very much be done as multiple files,
// properly split out; the instructions do say a single file, so here it is


#include <iostream>
#include <vector>
#include <map>
#include <cstdlib>
#include <utility>   // for pair
#include <algorithm> // for the various heap functions

// short helper function to get a random number in the range [lower..upper]
// In practise this will not give perfectly uniform distribution due to the inaccuracies
// of double precision, espectially v.close to the edges. However it is good enough
// for here.
inline double rand( double lower, double upper )
{
  return lower + (std::rand() * (upper-lower) / RAND_MAX);
}

// compare_nodes helper function needed for the various *_heap functions
// These functions want the largest value on the top of the heap, but we want
// the smallest distance of the pair instead.

struct compare_nodes
{
  bool operator()( const std::pair<unsigned int,double> &left,
                   const std::pair<unsigned int,double> &right )
  {
    return left.second > right.second;
  }
};


////////////////////////////////////////////////////////////////////////////////
// The edgelist is a list of all nodes that can be reached from a single node
// and it's distances
class EdgeList
{
public:
  // create a new, empty edgelist
  EdgeList( unsigned int index=0 )
  {
    this->index = index;
  }

  // getter for index (read-only, so no setter)
  unsigned int get_index( void )
  {
    return index;
  }

  // adds an edge to the list
  void add_edge( unsigned int index, double distance )
  {
    // need to check whether this route already exists or not.
    if( data.find(index) != data.end()  )
    {
      // it does. Is this route better?
      double current = data[index];
      if ( distance < current )
      {
        // yes. Store it.
        data[index] = distance;
      }
    }
    else
    {
      data[index] = distance;
    }
  }

  // expose the data iterator for the operator overload (begin)
  std::map<unsigned int,double>::iterator begin( void )
  {
    return data.begin();
  }

  // expose the data iterator for the operator overload (end)
  std::map<unsigned int,double>::iterator end( void )
  {
    return data.end();
  }

private:
  // data is stored as a map from node index (unsigned int) to distance (double)
  std::map<unsigned int,double> data;
  // the source node index for this edgelist
  unsigned int index;
};

// ostream override for pretty printing of EdgeList
std::ostream &operator<<( std::ostream &out, EdgeList &edge_list )
{
  out << edge_list.get_index() << ": ";
  for( std::map<unsigned int,double>::iterator data_iter=edge_list.begin();
       data_iter != edge_list.end();
       ++data_iter )
  {
    //first is the index, second the distance
    out << data_iter->first << " (" << data_iter->second << ") ";
  }

  out << std::endl;
  return out;
}


////////////////////////////////////////////////////////////////////////////////
// A Graph is a set of nodes and weighted, undirected edges between them
class Graph
{
public:
  Graph( unsigned int node_count, double edge_density, double minimum_distance=1.0, double maximum_distance=10.0)
  {
    // the chosen distance between two nodes.
    // Declared here to prevent needless reallocation.
    double distance;

    this->node_count = node_count;

    // rand will return RAND_MAX+1 possible ints, but we will want to create
    // an edge for only edge_density of these.
    // Compute this now to save on repeated calculations.
    // Note that we use a long here in case an edge density of >1.0 is used
    const long edge_chance = static_cast<long> (RAND_MAX * edge_density);

    // initialise the vector of EdgeLists
    for( unsigned int i=0; i<node_count; ++i )
    {
      data.push_back( EdgeList(i) );
    }

    // iterate over each (node,node) pair in turn, and consider making an edge between them.
    for( unsigned int i=0; i<node_count; ++i )
    {
      // note that in Dijkstra's algorithm, reflexive (node to itself) links
      // are effectively ignored as it will always be quicker to not take
      // that edge. We can safely ignore any such (nonsensical) links.
      // Also, since we are looking at undirected graphs, we only need to
      // consider the triangular matrix - hence starting j from i+1 and
      // assigning the edge j->i as well as i->j
      for( unsigned int j=i+1; j<node_count; ++j )
      {
        if( std::rand() <= edge_chance )
        {
          distance = rand( minimum_distance,maximum_distance );
          data[i].add_edge( j,distance );
          data[j].add_edge( i,distance );
        }
      }
    }
  }


  // calculate_shortest_path
  // Given a graph and two nodes within the graph, finds the shortest path
  // between thema via Dijkstra's algorithm
  double calculate_shortest_path( unsigned int from, unsigned int to)
  {
    // sanity check provided values
    if( from==to || from >= node_count || to >= node_count )
    {
      return 0.0;
    }

    // create a map of places we *know* the shortest distance to.
    // implicitly, we have visited ourself with a distance of 0
    std::map<unsigned int,double> visited;
    visited[from] = 0.0;

    // now create a vector of (index,distance pairings) of potential next
    // points and what it would cost to get there from the visited set.
    // Seed this with all visitable nodes from the start point.
    std::vector< std::pair<unsigned int,double> > next;
    for( std::map<unsigned int,double>::iterator next_iter=data[from].begin();
         next_iter!=data[from].end();
         ++next_iter)
    {
      next.push_back( std::make_pair(next_iter->first,next_iter->second) );
    }

    //sort it so that the smallest item is on the top
    std::make_heap( next.begin(), next.end(), compare_nodes() );

    // now we are all set up, call the inner recursive function to do the work.
    return calculate_shortest_path_recurse(to,visited,next);
  }
  // calculate_average_distance calculates the average path length in the graph
  // from node 0 to all other nodes.
  double calculate_average_distance( void )
  {
    // the total distance along the paths and the distance between two specific nodes
    double total_distance=0.0, distance=0.0;
    // the number of nodes linked in the graph
    unsigned int number_linked=0;

    // calculate the average distance between node 0 and all other nodes
    for( unsigned int i=1; i<node_count; ++i )
    {
      distance = calculate_shortest_path(0,i);
      // check that we have a path
      if( distance > 0.0 )
      {
        // build up the running total of distances and link count
        total_distance += distance;
        number_linked++;
      }
    }

    return total_distance/number_linked;
  }

  // expose the data iterator for the operator overload (begin)
  std::vector<EdgeList>::iterator begin( void )
  {
    return data.begin();
  }

  // expose the data iterator for the operator overload (end)
  std::vector<EdgeList>::iterator end( void )
  {
    return data.end();
  }

private:

  // calculate_shortest_path_recurse
  // The recursive loop for Dijkstra's algorithm.
  // Picks a single node from next,
  // adds it to visited,
  // checks for completion,
  // augments next with any new items
  // recurses onwards.
  double calculate_shortest_path_recurse( unsigned int to,
    std::map<unsigned int,double> &visited,
    std::vector< std::pair<unsigned int,double> > &next )
  {
    // check we haven't run out of places
    if( next.size()==0 )
    {
      return 0.0;
    }

    // take the next node off the list.
    std::pair<unsigned int,double> node=next.front();
    // heaps are weird in stl. need to use pop_heap to move the front
    // to the back then pop that.
    std::pop_heap( next.begin(),next.end(),compare_nodes() );
    next.pop_back();
    // add it to the visited list
    visited[node.first] = node.second;

    // check for exit condition.
    if( node.first==to )
    {
      return node.second;
    }

    //nope, so consider list of potential new next nodes
    for( std::map<unsigned int,double>::iterator edge_iter=data[node.first].begin();
         edge_iter!=data[node.first].end();
         ++edge_iter )
    {
      // first check that the node is not already visited
      if( visited.find(edge_iter->first) != visited.end() )
      {
        // don't bother with this one, we already have a best path
        continue;
      }
      // is it already on the next list?
      bool already_found = false;
      for( std::vector< std::pair<unsigned int,double> >::iterator next_iter=next.begin();
           next_iter!=next.end();
           ++next_iter )
      {
        if( next_iter->first==edge_iter->first )
        {
          // it is! But is this a better route?
          if( edge_iter->second + node.second < next_iter->second )
          {
            // yes! note the new distance
            next_iter->second = edge_iter->second + node.second;
            already_found = true;
            // need to rework the heap, potentially
            std::make_heap( next.begin(),next.end(),compare_nodes() );
            // all done for this one, break out
            break;
          }
        }
      }
      // did we parse this node?
      if( already_found==true )
      {
        continue;
      }
      // it has yet to appear in next, so add it
      // note that we need to add node.second to reflect the true cost of getting there.
      next.push_back( std::make_pair(edge_iter->first,node.second+edge_iter->second) );
      std::push_heap( next.begin(),next.end(),compare_nodes() );
    }

    // now we have set up for the next iteration, recurse in.
    return calculate_shortest_path_recurse( to,visited,next );
  }

  // data is a vector of edgelists.
  // the Nth vector in data is a map of all the nodes reachable from node N and their distance-costs.
  std::vector< EdgeList > data;

  unsigned int node_count;
};

// ostream override for pretty printing of Graph
std::ostream &operator<<( std::ostream &out, Graph &graph )
{
  for( std::vector<EdgeList>::iterator data_iter=graph.begin();
       data_iter != graph.end();
       ++data_iter )
  {
    out << *data_iter;
  }

  return out;
}

////////////////////////////////////////////////////////////////////////////////
// entry point to the program
int main( void )
{

  // initialise random number generator with something random enough.
   std::srand( time(NULL) );
  // create a graph of 50 NODES, 20% connectivity, distances from 1.0 to 10.0
  Graph G = Graph( 50,0.2 );
  std::cout << "Average distance for 20% connectivity: " << G.calculate_average_distance() << std::endl;
  // Now make one with 40%
  G = Graph( 50,0.4 );
  std::cout << "Average distance for 40% connectivity: " << G.calculate_average_distance() << std::endl;

  return 0;
}
	// C for C programmers
	// Assignment 2
	// compiles and runs under g++ 4.8.1 with no options and also
	// with strict language and error checking turned on
	// (-ansi -std=c++98 -Wall -Wextra -Werror -Wpedantic)
	//
	// Note that I do NOT use 'using namespace std' - this is considered
	// bad practise as it leads to namespace pollution - see, for instance,
	// the style guide linked to in the course:
	// http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces
	//
	// Also, in real world this would very much be done as multiple files,
	// properly split out; the instructions do say a single file, so here it is


	#include <iostream>
	#include <vector>
	#include <map>
	#include <cstdlib>
	#include <utility> // for pair
	#include <algorithm> // for the various heap functions

	// short helper function to get a random number in the range [lower..upper]
	// In practise this will not give perfectly uniform distribution due to the inaccuracies
	// of double precision, espectially v.close to the edges. However it is good enough
	// for here.
	inline double rand( double lower, double upper )
	{
	return lower + (std::rand() * (upper-lower) / RAND_MAX);
	}

	// compare_nodes helper function needed for the various *_heap functions
	// These functions want the largest value on the top of the heap, but we want
	// the smallest distance of the pair instead.

	struct compare_nodes
	{
	bool operator()( const std::pair<unsigned int,double> &left,
	const std::pair<unsigned int,double> &right )
	{
	return left.second > right.second;
	}
	};


	////////////////////////////////////////////////////////////////////////////////
	// The edgelist is a list of all nodes that can be reached from a single node
	// and it's distances
	class EdgeList
	{
	public:
	// create a new, empty edgelist
	EdgeList( unsigned int index=0 )
	{
	this->index = index;
	}

	// getter for index (read-only, so no setter)
	unsigned int get_index( void )
	{
	return index;
	}

	// adds an edge to the list
	void add_edge( unsigned int index, double distance )
	{
	// need to check whether this route already exists or not.
	if( data.find(index) != data.end() )
	{
	// it does. Is this route better?
	double current = data[index];
	if ( distance < current )
	{
	// yes. Store it.
	data[index] = distance;
	}
	}
	else
	{
	data[index] = distance;
	}
	}

	// expose the data iterator for the operator overload (begin)
	std::map<unsigned int,double>::iterator begin( void )
	{
	return data.begin();
	}

	// expose the data iterator for the operator overload (end)
	std::map<unsigned int,double>::iterator end( void )
	{
	return data.end();
	}

	private:
	// data is stored as a map from node index (unsigned int) to distance (double)
	std::map<unsigned int,double> data;
	// the source node index for this edgelist
	unsigned int index;
	};

	// ostream override for pretty printing of EdgeList
	std::ostream &operator<<( std::ostream &out, EdgeList &edge_list )
	{
	out << edge_list.get_index() << ": ";
	for( std::map<unsigned int,double>::iterator data_iter=edge_list.begin();
	data_iter != edge_list.end();
	++data_iter )
	{
	//first is the index, second the distance
	out << data_iter->first << " (" << data_iter->second << ") ";
	}

	out << std::endl;
	return out;
	}


	////////////////////////////////////////////////////////////////////////////////
	// A Graph is a set of nodes and weighted, undirected edges between them
	class Graph
	{
	public:
	Graph( unsigned int node_count, double edge_density, double minimum_distance=1.0, double maximum_distance=10.0)
	{
	// the chosen distance between two nodes.
	// Declared here to prevent needless reallocation.
	double distance;

	this->node_count = node_count;

	// rand will return RAND_MAX+1 possible ints, but we will want to create
	// an edge for only edge_density of these.
	// Compute this now to save on repeated calculations.
	// Note that we use a long here in case an edge density of >1.0 is used
	const long edge_chance = static_cast<long> (RAND_MAX * edge_density);

	// initialise the vector of EdgeLists
	for( unsigned int i=0; i<node_count; ++i )
	{
	data.push_back( EdgeList(i) );
	}

	// iterate over each (node,node) pair in turn, and consider making an edge between them.
	for( unsigned int i=0; i<node_count; ++i )
	{
	// note that in Dijkstra's algorithm, reflexive (node to itself) links
	// are effectively ignored as it will always be quicker to not take
	// that edge. We can safely ignore any such (nonsensical) links.
	// Also, since we are looking at undirected graphs, we only need to
	// consider the triangular matrix - hence starting j from i+1 and
	// assigning the edge j->i as well as i->j
	for( unsigned int j=i+1; j<node_count; ++j )
	{
	if( std::rand() <= edge_chance )
	{
	distance = rand( minimum_distance,maximum_distance );
	data[i].add_edge( j,distance );
	data[j].add_edge( i,distance );
	}
	}
	}
	}


	// calculate_shortest_path
	// Given a graph and two nodes within the graph, finds the shortest path
	// between thema via Dijkstra's algorithm
	double calculate_shortest_path( unsigned int from, unsigned int to)
	{
	// sanity check provided values
	if( from==to \|\| from >= node_count \|\| to >= node_count )
	{
	return 0.0;
	}

	// create a map of places we know the shortest distance to.
	// implicitly, we have visited ourself with a distance of 0
	std::map<unsigned int,double> visited;
	visited[from] = 0.0;

	// now create a vector of (index,distance pairings) of potential next
	// points and what it would cost to get there from the visited set.
	// Seed this with all visitable nodes from the start point.
	std::vector< std::pair<unsigned int,double> > next;
	for( std::map<unsigned int,double>::iterator next_iter=data[from].begin();
	next_iter!=data[from].end();
	++next_iter)
	{
	next.push_back( std::make_pair(next_iter->first,next_iter->second) );
	}

	//sort it so that the smallest item is on the top
	std::make_heap( next.begin(), next.end(), compare_nodes() );

	// now we are all set up, call the inner recursive function to do the work.
	return calculate_shortest_path_recurse(to,visited,next);
	}
	// calculate_average_distance calculates the average path length in the graph
	// from node 0 to all other nodes.
	double calculate_average_distance( void )
	{
	// the total distance along the paths and the distance between two specific nodes
	double total_distance=0.0, distance=0.0;
	// the number of nodes linked in the graph
	unsigned int number_linked=0;

	// calculate the average distance between node 0 and all other nodes
	for( unsigned int i=1; i<node_count; ++i )
	{
	distance = calculate_shortest_path(0,i);
	// check that we have a path
	if( distance > 0.0 )
	{
	// build up the running total of distances and link count
	total_distance += distance;
	number_linked++;
	}
	}

	return total_distance/number_linked;
	}

	// expose the data iterator for the operator overload (begin)
	std::vector<EdgeList>::iterator begin( void )
	{
	return data.begin();
	}

	// expose the data iterator for the operator overload (end)
	std::vector<EdgeList>::iterator end( void )
	{
	return data.end();
	}

	private:

	// calculate_shortest_path_recurse
	// The recursive loop for Dijkstra's algorithm.
	// Picks a single node from next,
	// adds it to visited,
	// checks for completion,
	// augments next with any new items
	// recurses onwards.
	double calculate_shortest_path_recurse( unsigned int to,
	std::map<unsigned int,double> &visited,
	std::vector< std::pair<unsigned int,double> > &next )
	{
	// check we haven't run out of places
	if( next.size()==0 )
	{
	return 0.0;
	}

	// take the next node off the list.
	std::pair<unsigned int,double> node=next.front();
	// heaps are weird in stl. need to use pop_heap to move the front
	// to the back then pop that.
	std::pop_heap( next.begin(),next.end(),compare_nodes() );
	next.pop_back();
	// add it to the visited list
	visited[node.first] = node.second;

	// check for exit condition.
	if( node.first==to )
	{
	return node.second;
	}

	//nope, so consider list of potential new next nodes
	for( std::map<unsigned int,double>::iterator edge_iter=data[node.first].begin();
	edge_iter!=data[node.first].end();
	++edge_iter )
	{
	// first check that the node is not already visited
	if( visited.find(edge_iter->first) != visited.end() )
	{
	// don't bother with this one, we already have a best path
	continue;
	}
	// is it already on the next list?
	bool already_found = false;
	for( std::vector< std::pair<unsigned int,double> >::iterator next_iter=next.begin();
	next_iter!=next.end();
	++next_iter )
	{
	if( next_iter->first==edge_iter->first )
	{
	// it is! But is this a better route?
	if( edge_iter->second + node.second < next_iter->second )
	{
	// yes! note the new distance
	next_iter->second = edge_iter->second + node.second;
	already_found = true;
	// need to rework the heap, potentially
	std::make_heap( next.begin(),next.end(),compare_nodes() );
	// all done for this one, break out
	break;
	}
	}
	}
	// did we parse this node?
	if( already_found==true )
	{
	continue;
	}
	// it has yet to appear in next, so add it
	// note that we need to add node.second to reflect the true cost of getting there.
	next.push_back( std::make_pair(edge_iter->first,node.second+edge_iter->second) );
	std::push_heap( next.begin(),next.end(),compare_nodes() );
	}

	// now we have set up for the next iteration, recurse in.
	return calculate_shortest_path_recurse( to,visited,next );
	}

	// data is a vector of edgelists.
	// the Nth vector in data is a map of all the nodes reachable from node N and their distance-costs.
	std::vector< EdgeList > data;

	unsigned int node_count;
	};

	// ostream override for pretty printing of Graph
	std::ostream &operator<<( std::ostream &out, Graph &graph )
	{
	for( std::vector<EdgeList>::iterator data_iter=graph.begin();
	data_iter != graph.end();
	++data_iter )
	{
	out << *data_iter;
	}

	return out;
	}

	////////////////////////////////////////////////////////////////////////////////
	// entry point to the program
	int main( void )
	{

	// initialise random number generator with something random enough.
	std::srand( time(NULL) );
	// create a graph of 50 NODES, 20% connectivity, distances from 1.0 to 10.0
	Graph G = Graph( 50,0.2 );
	std::cout << "Average distance for 20% connectivity: " << G.calculate_average_distance() << std::endl;
	// Now make one with 40%
	G = Graph( 50,0.4 );
	std::cout << "Average distance for 40% connectivity: " << G.calculate_average_distance() << std::endl;

	return 0;
	}