Created
May 8, 2015 06:06
-
-
Save renjiexu/369d5a81bd8700490fbe to your computer and use it in GitHub Desktop.
Caltrain Schedule parse
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def getRouteNumber(lineNumber): | |
for name in colToRoute: | |
if lineNumber in colToRoute[name]: | |
return name | |
return "not found" | |
def amOrPm(row, col): | |
result = "AM" | |
if (col == 22 and row > 20) or col > 22: | |
result = "PM" | |
return result | |
f = open('nb_weekday', 'r') | |
colToTrainNumber = {} | |
trainNumberToCol = {} | |
stopNameToTimeslot = {} | |
trainStopCode = { | |
"San Francisco":"70011", | |
"22nd Street": "70021", | |
"Bayshore": "70031", | |
"So. San Francisco": "70041", | |
"San Bruno": "70051", | |
"Millbrae": "70061", | |
"Burlingame": "70081", | |
"San Mateo": "70091", | |
"Hayward Park": "70101", | |
"Hillsdale": "70111", | |
"Belmont": "70121", | |
"San Carlos": "70131", | |
"Redwood City": "70141", | |
"Menlo Park": "70161", | |
"Palo Alto": "70171", | |
"California Ave": "70191", | |
"San Antonio": "70201", | |
"Mountain View": "70211", | |
"Sunnyvale": "70221", | |
"Lawrence": "70231", | |
"Santa Clara": "70241", | |
"College Park": "70251", | |
"San Jose Diridon": "70261", | |
"Tamien": "70271", | |
"Capitol": "70281", | |
"Blossom Hill": "70291", | |
"Morgan Hill": "70301", | |
"San Martin": "70311", | |
"Gilroy": "70321" | |
} | |
colToRoute = { | |
"LOCAL": {2,3,19,21,22,24,25,26,28,44,45,46,47,48}, | |
"LIMITED": {5,7,9,10,12,14,15,17,18,20,27,29,30,32,33,35,37,38,40,42,43}, | |
"BABY BULLET": {4,6,8,11,13,16,31,34,36,39,41} | |
} | |
lines = [] | |
for line in f.readlines(): | |
lines.append(line) | |
firstLine = lines[0] | |
cols = firstLine.split('\t') | |
colNumber = 2 | |
while colNumber < len(cols): | |
colToTrainNumber[colNumber] = cols[colNumber] | |
trainNumberToCol[cols[colNumber]] = colNumber | |
colNumber += 1 | |
result = {} | |
lineNumber = 1 | |
lineNumberToStopName = [] | |
nameToZone = {} | |
while lineNumber < len(lines): | |
line = lines[lineNumber] | |
tokens = line.split('\t') | |
zoneNumber = tokens[0] | |
stopName = tokens[1] | |
nameToZone[stopName] = zoneNumber | |
col = 2 | |
schedules = [] | |
while col < len(tokens) - 2: | |
if col == 23: | |
col += 1 | |
continue | |
suffix = amOrPm(lineNumber, col) | |
timeslot = tokens[col].rstrip() | |
if len(timeslot) > 0 and timeslot != ' \xe2\x80\x94': | |
timeslot += suffix | |
schedules.append({colToTrainNumber[col] : timeslot}) | |
col += 1 | |
lineNumberToStopName.append(stopName) | |
result[stopName] = schedules | |
lineNumber += 1 | |
for stopName in lineNumberToStopName: | |
print '{"name":"%s zone-%s", "stopID":"%s", "children":[' %(stopName, nameToZone[stopName], trainStopCode[stopName]), | |
exist = False | |
for schedule in result[stopName]: | |
if not exist: | |
exist = True | |
else: | |
print ',' | |
for trainNumber in schedule: | |
col = trainNumberToCol[trainNumber] | |
routeName = getRouteNumber(col) | |
print '{"name":"%s", "trainNo":"%s", "route":"%s"}' %(schedule[trainNumber], trainNumber, routeName) | |
print ']},' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment