Created
April 8, 2016 19:57
-
-
Save garrettdashnelson/1c1e2b16c62f58acdda5ba6baf3375c1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
This is operating on a SQLite file which was generated using the following command to filter out entries with the same origin and destination | |
ogr2ogr -f SQLite -sql "SELECT * FROM usa_ttw_master WHERE NOT (O_STATEFP = D_STATEFP AND O_COUNTYFP = D_COUNTYFP AND O_TRACTCE = D_TRACTCE)" commutes_without_sameorigins.sqlite *.shp | |
The result of that command is a database with 4080575 entries. The original Shapefile had 4156426 entries, so 75851 same-origin entries were filtered out. | |
''' | |
from __future__ import print_function | |
import sqlite3 | |
import time | |
startTime = time.time() | |
# source database | |
sourceDatabaseFile = 'commutes_without_sameorigins.sqlite' | |
# the database we will generate with a unique entry for each node | |
nodesDatabaseFile = 'fips_table.sqlite' | |
# the Pajek file that will get fed to Combo | |
pajekFile = 'commutes.net' | |
# setup the connection to the nodes database file | |
nodesConnection = sqlite3.connect(nodesDatabaseFile) | |
nodesCursor = nodesConnection.cursor() | |
nodesCursor.execute('delete from fips_table') | |
# setup the connection to the source database file | |
sourceConnection = sqlite3.connect(sourceDatabaseFile) | |
sourceCursor = sourceConnection.cursor() | |
# select from the source and concatenate STATEFP, COUNTYFP, and TRACTCE to create a unique id for both origin and destination | |
sourceSql = "select est_com, (O_STATEFP || O_COUNTYFP || O_TRACTCE) as origin, (D_STATEFP || D_COUNTYFP || D_TRACTCE) as dest, o_latitude, o_longitud, d_latitude, d_longitud from usa_ttw_master" | |
sourceResult = sourceCursor.execute(sourceSql) | |
serializedFips = [] | |
# function to get serialized id number for fips, and build a database of same | |
def getFipsSerialId(fips,lat,lng): | |
if fips not in serializedFips: | |
serializedFips.append(fips) | |
nodesCursor.execute('insert into fips_table (fips,lat,lng) values (%s, %f, %f)' % (fips,lat,lng) ) | |
return len(serializedFips) | |
else: | |
return serializedFips.index(fips) + 1 | |
# create a temporary file to hold the arcs | |
arcTmp = open('arctmp.tmp','w+') | |
arcTmp.write('*Arcs\n') | |
# Begin building the list of arcs (commutes) here | |
i = 0 # throttle dummy | |
for row in sourceResult: | |
# for debug throttling | |
# if i > 10: break | |
i = i+1 | |
origId = getFipsSerialId(row[1],row[3],row[4]) | |
destId = getFipsSerialId(row[2],row[5],row[6]) | |
arcTmp.write( ' ' + str(origId) + ' ' + str(destId) + ' ' + str(row[0]) + '\n') | |
if i % 10000 == 0: | |
nodesConnection.commit() | |
print( str(i) , end='\r' ) | |
# create a temporary file to hold the vertices | |
verticesTmp = open('verttmp.tmp','w+') | |
verticesTmp.write('*Vertices\n') # manually add number in later | |
# finally, go back and look through all the vertices we created | |
nodesResult = nodesCursor.execute('select serial_id, fips from fips_table') | |
for row in nodesResult: | |
verticesTmp.write(' ' + str(row[0]) + ' "' + str(row[1]) + '"\n') | |
# now join those two temporary files into the Pajek file | |
outFile = open(pajekFile,'w') | |
verticesTmp.seek(0) | |
outFile.write(verticesTmp.read()) | |
arcTmp.seek(0) | |
outFile.write(arcTmp.read()) | |
nodesConnection.commit() | |
nodesConnection.close() | |
sourceConnection.close() | |
print(time.time()-startTime) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment