garrettdashnelson/omni_generator.py

## omni_generator.py
'''
This is operating on a SQLite file which was generated using the following command to filter out entries with the same origin and destination

ogr2ogr -f SQLite -sql "SELECT * FROM usa_ttw_master WHERE NOT (O_STATEFP = D_STATEFP AND O_COUNTYFP = D_COUNTYFP AND O_TRACTCE = D_TRACTCE)" commutes_without_sameorigins.sqlite *.shp

The result of that command is a database with 4080575 entries. The original Shapefile had 4156426 entries, so 75851 same-origin entries were filtered out.

'''
from __future__ import print_function

import sqlite3
import time
startTime = time.time()

# source database
sourceDatabaseFile = 'commutes_without_sameorigins.sqlite'

# the database we will generate with a unique entry for each node
nodesDatabaseFile = 'fips_table.sqlite'

# the Pajek file that will get fed to Combo
pajekFile = 'commutes.net'


# setup the connection to the nodes database file
nodesConnection = sqlite3.connect(nodesDatabaseFile)
nodesCursor = nodesConnection.cursor()
nodesCursor.execute('delete from fips_table')

# setup the connection to the source database file
sourceConnection = sqlite3.connect(sourceDatabaseFile)
sourceCursor = sourceConnection.cursor()

# select from the source and concatenate STATEFP, COUNTYFP, and TRACTCE to create a unique id for both origin and destination
sourceSql = "select est_com, (O_STATEFP || O_COUNTYFP || O_TRACTCE) as origin, (D_STATEFP || D_COUNTYFP || D_TRACTCE) as dest, o_latitude, o_longitud, d_latitude, d_longitud from usa_ttw_master"

sourceResult = sourceCursor.execute(sourceSql)

serializedFips = []

# function to get serialized id number for fips, and build a database of same
def getFipsSerialId(fips,lat,lng):

	if fips not in serializedFips:
		serializedFips.append(fips)

		nodesCursor.execute('insert into fips_table (fips,lat,lng) values (%s, %f, %f)' % (fips,lat,lng) )
		return len(serializedFips)

	else:
		return serializedFips.index(fips) + 1


# create a temporary file to hold the arcs

arcTmp = open('arctmp.tmp','w+')
arcTmp.write('*Arcs\n')


# Begin building the list of arcs (commutes) here
i = 0 # throttle dummy

for row in sourceResult:

	# for debug throttling
	# if i > 10: break
	i = i+1

	origId = getFipsSerialId(row[1],row[3],row[4])
	destId = getFipsSerialId(row[2],row[5],row[6])

	arcTmp.write( ' ' + str(origId) + ' ' + str(destId) + ' ' + str(row[0]) + '\n')

	if i % 10000 == 0:
		nodesConnection.commit()

	print( str(i) , end='\r' )


# create a temporary file to hold the vertices
verticesTmp = open('verttmp.tmp','w+')
verticesTmp.write('*Vertices\n') # manually add number in later

# finally, go back and look through all the vertices we created

nodesResult = nodesCursor.execute('select serial_id, fips from fips_table')

for row in nodesResult:
	verticesTmp.write(' ' + str(row[0]) + ' "' + str(row[1]) + '"\n')


# now join those two temporary files into the Pajek file
outFile = open(pajekFile,'w')

verticesTmp.seek(0)
outFile.write(verticesTmp.read())

arcTmp.seek(0)
outFile.write(arcTmp.read())


nodesConnection.commit()
nodesConnection.close()
sourceConnection.close()

print(time.time()-startTime)
	'''
	This is operating on a SQLite file which was generated using the following command to filter out entries with the same origin and destination

	ogr2ogr -f SQLite -sql "SELECT * FROM usa_ttw_master WHERE NOT (O_STATEFP = D_STATEFP AND O_COUNTYFP = D_COUNTYFP AND O_TRACTCE = D_TRACTCE)" commutes_without_sameorigins.sqlite *.shp

	The result of that command is a database with 4080575 entries. The original Shapefile had 4156426 entries, so 75851 same-origin entries were filtered out.

	'''
	from __future__ import print_function

	import sqlite3
	import time
	startTime = time.time()

	# source database
	sourceDatabaseFile = 'commutes_without_sameorigins.sqlite'

	# the database we will generate with a unique entry for each node
	nodesDatabaseFile = 'fips_table.sqlite'

	# the Pajek file that will get fed to Combo
	pajekFile = 'commutes.net'






	# setup the connection to the nodes database file
	nodesConnection = sqlite3.connect(nodesDatabaseFile)
	nodesCursor = nodesConnection.cursor()
	nodesCursor.execute('delete from fips_table')

	# setup the connection to the source database file
	sourceConnection = sqlite3.connect(sourceDatabaseFile)
	sourceCursor = sourceConnection.cursor()

	# select from the source and concatenate STATEFP, COUNTYFP, and TRACTCE to create a unique id for both origin and destination
	sourceSql = "select est_com, (O_STATEFP \|\| O_COUNTYFP \|\| O_TRACTCE) as origin, (D_STATEFP \|\| D_COUNTYFP \|\| D_TRACTCE) as dest, o_latitude, o_longitud, d_latitude, d_longitud from usa_ttw_master"

	sourceResult = sourceCursor.execute(sourceSql)

	serializedFips = []

	# function to get serialized id number for fips, and build a database of same
	def getFipsSerialId(fips,lat,lng):

	if fips not in serializedFips:
	serializedFips.append(fips)

	nodesCursor.execute('insert into fips_table (fips,lat,lng) values (%s, %f, %f)' % (fips,lat,lng) )
	return len(serializedFips)

	else:
	return serializedFips.index(fips) + 1


	# create a temporary file to hold the arcs

	arcTmp = open('arctmp.tmp','w+')
	arcTmp.write('*Arcs\n')


	# Begin building the list of arcs (commutes) here
	i = 0 # throttle dummy

	for row in sourceResult:

	# for debug throttling
	# if i > 10: break
	i = i+1

	origId = getFipsSerialId(row[1],row[3],row[4])
	destId = getFipsSerialId(row[2],row[5],row[6])

	arcTmp.write( ' ' + str(origId) + ' ' + str(destId) + ' ' + str(row[0]) + '\n')

	if i % 10000 == 0:
	nodesConnection.commit()

	print( str(i) , end='\r' )


	# create a temporary file to hold the vertices
	verticesTmp = open('verttmp.tmp','w+')
	verticesTmp.write('*Vertices\n') # manually add number in later

	# finally, go back and look through all the vertices we created

	nodesResult = nodesCursor.execute('select serial_id, fips from fips_table')

	for row in nodesResult:
	verticesTmp.write(' ' + str(row[0]) + ' "' + str(row[1]) + '"\n')



	# now join those two temporary files into the Pajek file
	outFile = open(pajekFile,'w')

	verticesTmp.seek(0)
	outFile.write(verticesTmp.read())

	arcTmp.seek(0)
	outFile.write(arcTmp.read())


	nodesConnection.commit()
	nodesConnection.close()
	sourceConnection.close()

	print(time.time()-startTime)