gregmacfarlane/shp2xml.py

## shp2xml.py
# Code to read the FAF network
# Greg Macfarlane | Parsons Brinckerhoff


# ORNL provides the FAF network as a TransCAD network, and also as a shapefile
# that I think is probably exported from the TransCAD network. Kyle Ward in the
# Raleigh office tried to export an xml file from TransCAD, but it wasn't quite
# right. So I decided to write this script to make an xml MATSim network from
# the shapefile.

# The networkx library contains general graph functionality, including the
# ability to read a line shapefile as a graph. I tested its capability by
# exporting the network as a line shapefile of edges and a point shapefile of
# nodes, and was satisfied.
import networkx as nx

# To write the graph to xml, we use commands in the lxml library; we need to use
# this instead of the ElementTree library that comes standard with Python
# installations so that we can `pretty_print` the xml output.
import lxml.etree as ET


# When we read in G, the node labels are the coordinate tuples, e.g.
# (-111.458149, 38.108458). This isn't very useful, so we change them into
# integers beginning at 0. Also, MATSim likes to use coordinates in meters, so
# we use a version of the network that we exported into EPSG: 2818, a Lambert
# Conformal Conic projection centered on Montana. This projection gives
# coordinates in meters, and has the added LCC feature of minimizing distance
# distortion across a wide area of the map.
G = nx.read_shp("fafnetworkLCC.shp")

start = 0
G = nx.convert_node_labels_to_integers(G, first_label=start,
        label_attribute = "coord")


# MATSIM NETWORK BUILDER
# =============================================

# We need to build an element tree to store the node and edge attributes. We
# follow the [MATSim network specification](http://www.matsim.org/files/dtd/network_v1.dtd),
# as closely as possible; this calls for an xml structure
#
#  <network><nodes><node ...></nodes><links><link ....></links></network>.
#
# So in the element tree, the parent element is `network`, with two children,
# `links` and `nodes`. The number of individual `node` and `link` children is
# dependent on the size of the network.
# In general we follow the
# [MATSim network specification](http://www.matsim.org/files/dtd/network_v1.dtd)

network = ET.Element("network",
        attrib={'name':"MATSim network exported from FAF shapefile."})


# Node structure
# ----------------------------------------------
nodes = ET.SubElement(network, "nodes")

# Get node attributes by looping through all the nodes in graph. We can pull the
# id and coordinate attributes from the node attributes. The MATSim
# specification allows for a "type" attribute to distinguish between networks.
# I don't know that this is strictly necessary, but the example network uses "2"
# for all nodes, so I'll continue that convention to limit as many differences
# as possible.
#
#   <node id, x, y, type = "2")
for i in range(len(G)):
    ET.SubElement(nodes, "node",
            attrib={'id': str(G.nodes()[i]),
                    'x':str(G.node[i]['coord'][0]),
                    'y':str(G.node[i]['coord'][1]),
                    'type':"2"})


# Link structure
# -------------------------------------------
# There are a number of general link attributes that we need to set at the
# parent element. The specification says defaults are available, but we will set
# them anyways. The period over which link capacity is recorded `capperiod` is
# one hour, the effective length of vehicles `effectivecellcize` is 7.5 meters
# (we might want to change this, because we're dealing with trucks?), and the
# effective lane width is 3.75 meters
links = ET.SubElement(network, "links",
        attrib={'capperiod': "01:00:00",
                'effectivecellsize': "7.5",
                'effectivelanewidth': "3.75"})


# The MATSim network specification has a number of required fields and some that
# are not required. The required links are the link ID, the "from" node ID, the
# "to" node ID, and the distance, or the "length". The FAF network gives the
# distance in miles, but the network wants meters, so we need to apply a
# conversion factor.

# The `capacity` attribute is not listed as mandatory, but we needed to set the
# capperiod, so I'm worried that this is actually necessary. For now we'll set
# a constant value at 2000 veh/hrln * 3 lanes = 6000 veh/hr. We might be able to
# set this more precisely using the `SIGNT1` network attribute that determines
# whether a road is an interstate, state highway, etc.

# Other optional attributes include free flow speed, permanent lanes, whether
# the link is one-way (see discussion of reversed links below), the modes,
# permitted on the link (I don't know how this is supposed to tie into the node
# type above). We also append an additional attribute GEOID, constructed from
# the state and county FIPS codes. This may prove useful.

# To simplify the code, we want to extract the edge attributes into a form where
# we can get them as variable[(start,end)]
length = nx.get_edge_attributes(G, "MILES")
idvar  = nx.get_edge_attributes(G, "ID")
ctfips = nx.get_edge_attributes(G, "CTFIPS")
stfips = nx.get_edge_attributes(G, "STFIPS")
fclass = nx.get_edge_attributes(G, "SIGNT1")

for i in range(len(G.edges())):
    startnode = G.edges()[i][0]
    endnode = G.edges()[i][1]
    # create link SubElement with attributes pulled from the lists you got above.
    ET.SubElement(links, "link", attrib={
        'id': str(idvar[(startnode, endnode)]),
        'from': str(startnode),
        'to':   str(endnode),
        'capacity': str(6000),
        'modes': "car",
        'oneway': str(1),
        'type': str(10),
        'length': str(length[(startnode, endnode)] * 1609.34), # convert to meters
        'GEOID': str(stfips[(startnode, endnode)]).zfill(2) + str(ctfips[(startnode, endnode)]).zfill(3)})


# One extra item I noted in the MATSim documentation is that all links
# **must be** one-way. So we need to create all of the same links again, but
# with start and end nodes swapped. Also, we'll have to make a new link id.
# Because there are about ~180,000 links in the network if we add 1e6 to each of
# the IDs, we won't duplicate any labels. Also, link `2183` will have the
# ID `1002183`, for its reverse partner, which might be nice.
for i in range(len(G.edges())):
    startnode = G.edges()[i][0]
    endnode = G.edges()[i][1]
    ET.SubElement(links, "link", attrib={
        'id': str(int(idvar[(startnode, endnode)] + 1e6)), #create unique id as above
        'from': str(endnode), #swap to and from node ids
        'to':   str(startnode),
        'capacity': str(6000),
        'modes': "car",
        'oneway': str(1),
        'type': str(10),
        'length': str(length[(startnode, endnode)] * 1609.34), # convert to meters
        'GEOID': str(stfips[(startnode, endnode)]).zfill(2) + str(ctfips[(startnode, endnode)]).zfill(3)})


# We can now wrap the entire element tree and write it to an xml file with the
# same headers that came in the original file.
tree = ET.ElementTree(network)

with open('network.xml', 'w') as f:
    f.write("""<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE network SYSTEM "http://www.matsim.org/files/dtd/network_v1.dtd">
""")
    tree.write(f, pretty_print = True)
	# Code to read the FAF network
	# Greg Macfarlane \| Parsons Brinckerhoff


	# ORNL provides the FAF network as a TransCAD network, and also as a shapefile
	# that I think is probably exported from the TransCAD network. Kyle Ward in the
	# Raleigh office tried to export an xml file from TransCAD, but it wasn't quite
	# right. So I decided to write this script to make an xml MATSim network from
	# the shapefile.

	# The networkx library contains general graph functionality, including the
	# ability to read a line shapefile as a graph. I tested its capability by
	# exporting the network as a line shapefile of edges and a point shapefile of
	# nodes, and was satisfied.
	import networkx as nx

	# To write the graph to xml, we use commands in the lxml library; we need to use
	# this instead of the ElementTree library that comes standard with Python
	# installations so that we can `pretty_print` the xml output.
	import lxml.etree as ET


	# When we read in G, the node labels are the coordinate tuples, e.g.
	# (-111.458149, 38.108458). This isn't very useful, so we change them into
	# integers beginning at 0. Also, MATSim likes to use coordinates in meters, so
	# we use a version of the network that we exported into EPSG: 2818, a Lambert
	# Conformal Conic projection centered on Montana. This projection gives
	# coordinates in meters, and has the added LCC feature of minimizing distance
	# distortion across a wide area of the map.
	G = nx.read_shp("fafnetworkLCC.shp")

	start = 0
	G = nx.convert_node_labels_to_integers(G, first_label=start,
	label_attribute = "coord")


	# MATSIM NETWORK BUILDER
	# =============================================

	# We need to build an element tree to store the node and edge attributes. We
	# follow the [MATSim network specification](http://www.matsim.org/files/dtd/network_v1.dtd),
	# as closely as possible; this calls for an xml structure
	#
	# <network><nodes><node ...></nodes><links><link ....></links></network>.
	#
	# So in the element tree, the parent element is `network`, with two children,
	# `links` and `nodes`. The number of individual `node` and `link` children is
	# dependent on the size of the network.
	# In general we follow the
	# [MATSim network specification](http://www.matsim.org/files/dtd/network_v1.dtd)

	network = ET.Element("network",
	attrib={'name':"MATSim network exported from FAF shapefile."})


	# Node structure
	# ----------------------------------------------
	nodes = ET.SubElement(network, "nodes")

	# Get node attributes by looping through all the nodes in graph. We can pull the
	# id and coordinate attributes from the node attributes. The MATSim
	# specification allows for a "type" attribute to distinguish between networks.
	# I don't know that this is strictly necessary, but the example network uses "2"
	# for all nodes, so I'll continue that convention to limit as many differences
	# as possible.
	#
	# <node id, x, y, type = "2")
	for i in range(len(G)):
	ET.SubElement(nodes, "node",
	attrib={'id': str(G.nodes()[i]),
	'x':str(G.node[i]['coord'][0]),
	'y':str(G.node[i]['coord'][1]),
	'type':"2"})


	# Link structure
	# -------------------------------------------
	# There are a number of general link attributes that we need to set at the
	# parent element. The specification says defaults are available, but we will set
	# them anyways. The period over which link capacity is recorded `capperiod` is
	# one hour, the effective length of vehicles `effectivecellcize` is 7.5 meters
	# (we might want to change this, because we're dealing with trucks?), and the
	# effective lane width is 3.75 meters
	links = ET.SubElement(network, "links",
	attrib={'capperiod': "01:00:00",
	'effectivecellsize': "7.5",
	'effectivelanewidth': "3.75"})



	# The MATSim network specification has a number of required fields and some that
	# are not required. The required links are the link ID, the "from" node ID, the
	# "to" node ID, and the distance, or the "length". The FAF network gives the
	# distance in miles, but the network wants meters, so we need to apply a
	# conversion factor.

	# The `capacity` attribute is not listed as mandatory, but we needed to set the
	# capperiod, so I'm worried that this is actually necessary. For now we'll set
	# a constant value at 2000 veh/hrln * 3 lanes = 6000 veh/hr. We might be able to
	# set this more precisely using the `SIGNT1` network attribute that determines
	# whether a road is an interstate, state highway, etc.

	# Other optional attributes include free flow speed, permanent lanes, whether
	# the link is one-way (see discussion of reversed links below), the modes,
	# permitted on the link (I don't know how this is supposed to tie into the node
	# type above). We also append an additional attribute GEOID, constructed from
	# the state and county FIPS codes. This may prove useful.

	# To simplify the code, we want to extract the edge attributes into a form where
	# we can get them as variable[(start,end)]
	length = nx.get_edge_attributes(G, "MILES")
	idvar = nx.get_edge_attributes(G, "ID")
	ctfips = nx.get_edge_attributes(G, "CTFIPS")
	stfips = nx.get_edge_attributes(G, "STFIPS")
	fclass = nx.get_edge_attributes(G, "SIGNT1")

	for i in range(len(G.edges())):
	startnode = G.edges()[i][0]
	endnode = G.edges()[i][1]
	# create link SubElement with attributes pulled from the lists you got above.
	ET.SubElement(links, "link", attrib={
	'id': str(idvar[(startnode, endnode)]),
	'from': str(startnode),
	'to': str(endnode),
	'capacity': str(6000),
	'modes': "car",
	'oneway': str(1),
	'type': str(10),
	'length': str(length[(startnode, endnode)] * 1609.34), # convert to meters
	'GEOID': str(stfips[(startnode, endnode)]).zfill(2) + str(ctfips[(startnode, endnode)]).zfill(3)})


	# One extra item I noted in the MATSim documentation is that all links
	# must be one-way. So we need to create all of the same links again, but
	# with start and end nodes swapped. Also, we'll have to make a new link id.
	# Because there are about ~180,000 links in the network if we add 1e6 to each of
	# the IDs, we won't duplicate any labels. Also, link `2183` will have the
	# ID `1002183`, for its reverse partner, which might be nice.
	for i in range(len(G.edges())):
	startnode = G.edges()[i][0]
	endnode = G.edges()[i][1]
	ET.SubElement(links, "link", attrib={
	'id': str(int(idvar[(startnode, endnode)] + 1e6)), #create unique id as above
	'from': str(endnode), #swap to and from node ids
	'to': str(startnode),
	'capacity': str(6000),
	'modes': "car",
	'oneway': str(1),
	'type': str(10),
	'length': str(length[(startnode, endnode)] * 1609.34), # convert to meters
	'GEOID': str(stfips[(startnode, endnode)]).zfill(2) + str(ctfips[(startnode, endnode)]).zfill(3)})



	# We can now wrap the entire element tree and write it to an xml file with the
	# same headers that came in the original file.
	tree = ET.ElementTree(network)

	with open('network.xml', 'w') as f:
	f.write("""<?xml version="1.0" encoding="UTF-8" ?>
	<!DOCTYPE network SYSTEM "http://www.matsim.org/files/dtd/network_v1.dtd">
	""")
	tree.write(f, pretty_print = True)