Skip to content

Instantly share code, notes, and snippets.

@garrettdashnelson
Created April 8, 2016 19:57
Show Gist options
  • Save garrettdashnelson/1c1e2b16c62f58acdda5ba6baf3375c1 to your computer and use it in GitHub Desktop.
Save garrettdashnelson/1c1e2b16c62f58acdda5ba6baf3375c1 to your computer and use it in GitHub Desktop.
'''
This is operating on a SQLite file which was generated using the following command to filter out entries with the same origin and destination
ogr2ogr -f SQLite -sql "SELECT * FROM usa_ttw_master WHERE NOT (O_STATEFP = D_STATEFP AND O_COUNTYFP = D_COUNTYFP AND O_TRACTCE = D_TRACTCE)" commutes_without_sameorigins.sqlite *.shp
The result of that command is a database with 4080575 entries. The original Shapefile had 4156426 entries, so 75851 same-origin entries were filtered out.
'''
from __future__ import print_function
import sqlite3
import time
startTime = time.time()
# source database
sourceDatabaseFile = 'commutes_without_sameorigins.sqlite'
# the database we will generate with a unique entry for each node
nodesDatabaseFile = 'fips_table.sqlite'
# the Pajek file that will get fed to Combo
pajekFile = 'commutes.net'
# setup the connection to the nodes database file
nodesConnection = sqlite3.connect(nodesDatabaseFile)
nodesCursor = nodesConnection.cursor()
nodesCursor.execute('delete from fips_table')
# setup the connection to the source database file
sourceConnection = sqlite3.connect(sourceDatabaseFile)
sourceCursor = sourceConnection.cursor()
# select from the source and concatenate STATEFP, COUNTYFP, and TRACTCE to create a unique id for both origin and destination
sourceSql = "select est_com, (O_STATEFP || O_COUNTYFP || O_TRACTCE) as origin, (D_STATEFP || D_COUNTYFP || D_TRACTCE) as dest, o_latitude, o_longitud, d_latitude, d_longitud from usa_ttw_master"
sourceResult = sourceCursor.execute(sourceSql)
serializedFips = []
# function to get serialized id number for fips, and build a database of same
def getFipsSerialId(fips,lat,lng):
if fips not in serializedFips:
serializedFips.append(fips)
nodesCursor.execute('insert into fips_table (fips,lat,lng) values (%s, %f, %f)' % (fips,lat,lng) )
return len(serializedFips)
else:
return serializedFips.index(fips) + 1
# create a temporary file to hold the arcs
arcTmp = open('arctmp.tmp','w+')
arcTmp.write('*Arcs\n')
# Begin building the list of arcs (commutes) here
i = 0 # throttle dummy
for row in sourceResult:
# for debug throttling
# if i > 10: break
i = i+1
origId = getFipsSerialId(row[1],row[3],row[4])
destId = getFipsSerialId(row[2],row[5],row[6])
arcTmp.write( ' ' + str(origId) + ' ' + str(destId) + ' ' + str(row[0]) + '\n')
if i % 10000 == 0:
nodesConnection.commit()
print( str(i) , end='\r' )
# create a temporary file to hold the vertices
verticesTmp = open('verttmp.tmp','w+')
verticesTmp.write('*Vertices\n') # manually add number in later
# finally, go back and look through all the vertices we created
nodesResult = nodesCursor.execute('select serial_id, fips from fips_table')
for row in nodesResult:
verticesTmp.write(' ' + str(row[0]) + ' "' + str(row[1]) + '"\n')
# now join those two temporary files into the Pajek file
outFile = open(pajekFile,'w')
verticesTmp.seek(0)
outFile.write(verticesTmp.read())
arcTmp.seek(0)
outFile.write(arcTmp.read())
nodesConnection.commit()
nodesConnection.close()
sourceConnection.close()
print(time.time()-startTime)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment