Created
March 26, 2015 16:15
-
-
Save cds-amal/0f5a26f5ccd60e583300 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 157, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import re\n", | |
"from bs4 import BeautifulSoup as Soup\n", | |
"import pprint" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 86, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Delays\n", | |
" 02/22/2015 8:41AM \n", | |
"Due to a train with mechanical problems at 49 St, \n", | |
"southbound [Q] trains are running with delays.\n", | |
"Allow additional travel time.\n" | |
] | |
} | |
], | |
"source": [ | |
"fragment_good = '''<span class=\"TitleDelay\">Delays</span>\n", | |
"<span class=\"DateStyle\"> 02/22/2015 8:41AM </span>\n", | |
"<p>Due to a train with mechanical problems at <strong>49 St</strong>, \n", | |
"southbound [Q] trains are running with delays.</p>\n", | |
"<p>Allow additional travel time.</p>'''\n", | |
"\n", | |
"good_text = Soup(fragment_good).get_text()\n", | |
"print(good_text)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 87, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Delays\n", | |
"Posted: 03/16/2015 8:15AM\n", | |
"Following an earlier incident at E 180 St, \n", | |
"[2] and [5] train service has resumed with residual delays.\n" | |
] | |
} | |
], | |
"source": [ | |
"fragment_bad = '''<span class=\"TitleDelay\">Delays</span>\n", | |
"<span class=\"DateStyle\">Posted: 03/16/2015 8:15AM</span>\n", | |
"Following an earlier incident at <STRONG>E 180 St</STRONG>, \n", | |
"[2] and [5] train service has resumed with residual delays.'''\n", | |
"\n", | |
"bad_text = Soup(fragment_bad).get_text()\n", | |
"print(bad_text)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 88, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"rex = '(?P<notice>.+)(?=\\d\\d.\\d\\d.\\d\\d)(?P<datetime>[\\d/:\\s]+(AM|PM))(?P<info>.+)$'\n", | |
"rex = re.compile(rex, re.IGNORECASE|re.MULTILINE|re.DOTALL)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 89, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'datetime': '03/16/2015 8:15AM',\n", | |
" 'info': '\\n'\n", | |
" 'Following an earlier incident at E 180 St, \\n'\n", | |
" '[2] and [5] train service has resumed with residual delays.',\n", | |
" 'notice': 'Delays\\nPosted: '}\n" | |
] | |
} | |
], | |
"source": [ | |
"pprint.pprint(rex.match(bad_text).groupdict())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 90, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'datetime': '02/22/2015 8:41AM',\n", | |
" 'info': ' \\n'\n", | |
" 'Due to a train with mechanical problems at 49 St, \\n'\n", | |
" 'southbound [Q] trains are running with delays.\\n'\n", | |
" 'Allow additional travel time.',\n", | |
" 'notice': 'Delays\\n '}\n" | |
] | |
} | |
], | |
"source": [ | |
"pprint.pprint(rex.match(good_text).groupdict())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 91, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from IPython.display import HTML, display\n", | |
"from urllib.request import urlopen\n", | |
"html = urlopen(\"http://web.mta.info/status/serviceStatus.txt\").readall()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 92, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": false | |
}, | |
"outputs": [], | |
"source": [ | |
"soup = Soup(html)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 93, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"timestamp = soup.service.timestamp.get_text()\n", | |
"responsecode = soup.service.responsecode.get_text()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# What services exist?\n", | |
"\n", | |
"* subway\n", | |
"* bus\n", | |
"* bt\n", | |
" * bridge and tunnel?\n", | |
"* lirr\n", | |
"* metronorth" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 95, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"None\n", | |
"responsecode\n", | |
"None\n", | |
"timestamp\n", | |
"None\n", | |
"subway\n", | |
"None\n", | |
"bus\n", | |
"None\n", | |
"bt\n", | |
"None\n", | |
"lirr\n", | |
"None\n", | |
"metronorth\n", | |
"None\n" | |
] | |
} | |
], | |
"source": [ | |
"for child in soup.service.children:\n", | |
" print (child.name)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 104, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['subway', 'bus', 'bt', 'lirr', 'metronorth']" | |
] | |
}, | |
"execution_count": 104, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"services = [s.name for s in soup.service.children]\n", | |
"services = [s for s in services if s and s not in ['timestamp', 'responsecode']]\n", | |
"services" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 164, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<H1>SUBWAY Status Information</H1>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '03/26/2015',\n", | |
" 'name': '123',\n", | |
" 'status': 'DELAYS',\n", | |
" 'text': 'Delays\\n'\n", | |
" '\\r\\n'\n", | |
" ' Posted:03/26/201510:07AM\\r\\n'\n", | |
" ' \\r\\n'\n", | |
" ' Following an earlier inicdent at 145 St, [1] train service has '\n", | |
" 'resumed with residual delays.\\r\\n'\n", | |
" ' ',\n", | |
" 'time': '10:07AM'}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>NAIVE regex extraction attempt</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'datetime': '03/26/201510:07AM',\n", | |
" 'info': '\\r\\n'\n", | |
" ' \\r\\n'\n", | |
" ' Following an earlier inicdent at 145 St, [1] train service has '\n", | |
" 'resumed with residual delays.\\r\\n'\n", | |
" ' ',\n", | |
" 'notice': 'Delays\\n\\r\\n '}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>HTML Render of Payload text</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<span class=\"TitleDelay\">Delays</span>\r\n", | |
" <span class=\"DateStyle\">\r\n", | |
" Posted: 03/26/2015 10:07AM\r\n", | |
" </span><br/><br/>\r\n", | |
" Following an earlier inicdent at <STRONG>145 St</STRONG>, [1] train service has resumed with residual delays.\r\n", | |
" <br/><br/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '', 'name': '456', 'status': 'GOOD SERVICE', 'text': '', 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '', 'name': '7', 'status': 'GOOD SERVICE', 'text': '', 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '', 'name': 'ACE', 'status': 'GOOD SERVICE', 'text': '', 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '03/26/2015',\n", | |
" 'name': 'BDFM',\n", | |
" 'status': 'DELAYS',\n", | |
" 'text': 'Delays\\n'\n", | |
" '\\r\\n'\n", | |
" ' Posted:03/26/201510:04AM\\r\\n'\n", | |
" ' \\n'\n", | |
" 'Due to signal problems at Avenue P, southbound [F] trains are '\n", | |
" 'running with delays.\\n'\n", | |
" 'Allow additional travel time.\\n',\n", | |
" 'time': '10:04AM'}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>NAIVE regex extraction attempt</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'datetime': '03/26/201510:04AM',\n", | |
" 'info': '\\r\\n'\n", | |
" ' \\n'\n", | |
" 'Due to signal problems at Avenue P, southbound [F] trains are '\n", | |
" 'running with delays.\\n'\n", | |
" 'Allow additional travel time.\\n',\n", | |
" 'notice': 'Delays\\n\\r\\n '}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>HTML Render of Payload text</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<span class=\"TitleDelay\">Delays</span>\r\n", | |
" <span class=\"DateStyle\">\r\n", | |
" Posted: 03/26/2015 10:04AM\r\n", | |
" </span><br/><br/>\r\n", | |
" <P>Due to signal problems at <STRONG>Avenue P</STRONG>, southbound [F] trains are running with delays.</P>\r\n", | |
"<P>Allow additional travel time.</P>\r\n", | |
" <br/><br/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '', 'name': 'G', 'status': 'GOOD SERVICE', 'text': '', 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '', 'name': 'JZ', 'status': 'GOOD SERVICE', 'text': '', 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '', 'name': 'L', 'status': 'GOOD SERVICE', 'text': '', 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '03/26/2015',\n", | |
" 'name': 'NQR',\n", | |
" 'status': 'PLANNED WORK',\n", | |
" 'text': 'Planned Work\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '[Q] No trains between 57 St-7 Av and Ditmars Blvd\\r\\n'\n", | |
" 'Days, 10 AM to 3 PM, Tue to Fri, Mar 24 - 27\\r\\n'\n", | |
" '\\n'\n", | |
" '[Q] service operates between Stillwell Av and 57 St-7 Av.\\r\\n'\n", | |
" '\\n'\n", | |
" ' Take the [N]* instead. Transfer between trains at Times Sq-42 St '\n", | |
" '(across the platform).\\r\\n'\n", | |
" '\\n'\n", | |
" '*Please review [N] advisories for additional information that may '\n", | |
" 'affect your trip.\\r\\n'\n", | |
" '\\n'\n", | |
" 'Alternate travel note:\\r\\n'\n", | |
" '[R] service is also available at 5 Av/59 St and Lexington Av/59 '\n", | |
" 'St.\\r\\n'\n", | |
" '\\n'\n", | |
" '\\n',\n", | |
" 'time': '10:13AM'}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>HTML Render of Payload text</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<span class=\"TitlePlannedWork\" >Planned Work</span>\r\n", | |
" <br/>\r\n", | |
" <a class=\"plannedWorkDetailLink\" onclick=ShowHide(92497);>\r\n", | |
"<b>[Q] No trains between 57 St-7 Av and Ditmars Blvd\r\n", | |
"</a><br/><br/><div id= 92497 class=\"plannedWorkDetail\" ></b>Days, 10 AM to 3 PM, Tue to Fri, Mar 24 - 27\r\n", | |
"<br>\r\n", | |
"<br>[Q] service operates between <b>Stillwell Av</b> and <b>57 St-7 Av</b>.\r\n", | |
"<br>\r\n", | |
"<br>• Take the [N]* instead. Transfer between trains at Times Sq-42 St (across the platform).\r\n", | |
"<br>\r\n", | |
"<br></font><i>*Please review [N] advisories for additional information that may affect your trip.\r\n", | |
"<br>\r\n", | |
"<br><b>Alternate travel note:\r\n", | |
"<br></b></i>[R] service is also available at 5 Av/59 St and Lexington Av/59 St.\r\n", | |
"<br><b>\r\n", | |
"<br></div></b><br/>\r\n", | |
" <br/><br/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '', 'name': 'S', 'status': 'GOOD SERVICE', 'text': '', 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '', 'name': 'SIR', 'status': 'GOOD SERVICE', 'text': '', 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<H1>BUS Status Information</H1>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '03/26/2015',\n", | |
" 'name': 'B1 - B84',\n", | |
" 'status': 'SERVICE CHANGE',\n", | |
" 'text': 'Service Change\\n'\n", | |
" '\\r\\n'\n", | |
" ' Posted:03/26/201510:13AM\\r\\n'\n", | |
" ' \\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'B11 buses are detoured due to road work on 4 Avand 50 St. \\n'\n", | |
" 'Detour is as follows:\\n'\n", | |
" 'Eastbound: Via 52 St, left on 5 Av, right on 50 St and regular '\n", | |
" 'route.\\n'\n", | |
" 'Allow additional travel time.\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Delays\\n'\n", | |
" '\\r\\n'\n", | |
" ' Posted:03/26/201510:08AM\\r\\n'\n", | |
" ' \\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'B63 buses are running with delays in both directions, due to road '\n", | |
" 'work on 5 Av between 7 St and 8 St.\\n'\n", | |
" 'Allow additional travel time.\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Service Change\\n'\n", | |
" '\\r\\n'\n", | |
" ' Posted:03/26/201510:07AM\\r\\n'\n", | |
" ' \\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'B26 buses are detoured, due to milling on Halsey St between Howard '\n", | |
" 'Avand Patchen Av.\\n'\n", | |
" 'Detour is as follows;\\n'\n", | |
" 'Westbound: Via Halsey St, right on Howard Av, left on Hancock St, '\n", | |
" 'left on Reid Av, right on Halsey St and regular route.\\n'\n", | |
" 'Eastbound: Via Halsey St, left on Reid St, right on Jefferson Av, '\n", | |
" 'right on Saratoga Av, left on Halsey St and regular route.\\n'\n", | |
" 'Corresponding stops will be made along detour route.\\n'\n", | |
" 'Allow additional travel time.\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Planned Detour\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'B38/B38 LTD - Eastbound buses rerouted from Lafayette Av '\n", | |
" 'betweenStuyvesant Av and Malcolm X Blvd\\r\\n'\n", | |
" 'Until further notice\\r\\n'\n", | |
" '\\n'\n", | |
" 'Due to construction, buses make corresponding stops along the '\n", | |
" 'detoured route.\\r\\n'\n", | |
" '\\n'\n", | |
" 'Show Reroute Details\\n'\n", | |
" 'Via Lafayette Av\\r\\n'\n", | |
" 'Right on Stuyvesant Av\\r\\n'\n", | |
" 'Left on Greene Av\\r\\n'\n", | |
" 'Left on Malcolm X Blvd\\r\\n'\n", | |
" 'Right on Lafayette Av then regular route\\n'\n", | |
" '\\n'\n", | |
" 'Reminder: B38 LTD buses do not operate overnight.\\r\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Planned Detour\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'B24 and B46/B46 LTD buses detoured from the Entrance on Broadway '\n", | |
" 'at Washington Plz Terminal\\r\\n'\n", | |
" 'Until further notice\\r\\n'\n", | |
" '\\n'\n", | |
" 'Due to construction, buses operate as follows:\\r\\n'\n", | |
" '\\n'\n", | |
" 'Via Broadway\\r\\n'\n", | |
" 'Right on Roebling St into Washington Plaza.\\r\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Planned Detour\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'B20 and Q24 - Northbound/Westbound buses detoured from Van '\n", | |
" 'Sinderen Av at BroadwayJunction due to construction - No stops '\n", | |
" 'missed\\r\\n'\n", | |
" 'Until further notice\\r\\n'\n", | |
" '\\n'\n", | |
" 'Show Reroute Details\\n'\n", | |
" 'Via Jamaica Av\\r\\n'\n", | |
" 'Into Fulton St\\r\\n'\n", | |
" 'Right on Eastern Pkwy\\r\\n'\n", | |
" 'Left on Broadway then regular route\\n'\n", | |
" '\\n'\n", | |
" 'Reminder: B20 buses do not operate overnight.\\r\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n',\n", | |
" 'time': '10:13AM'}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>NAIVE regex extraction attempt</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'datetime': '03/26/201510:07AM',\n", | |
" 'info': '\\r\\n'\n", | |
" ' \\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'B26 buses are detoured, due to milling on Halsey St between Howard '\n", | |
" 'Avand Patchen Av.\\n'\n", | |
" 'Detour is as follows;\\n'\n", | |
" 'Westbound: Via Halsey St, right on Howard Av, left on Hancock St, '\n", | |
" 'left on Reid Av, right on Halsey St and regular route.\\n'\n", | |
" 'Eastbound: Via Halsey St, left on Reid St, right on Jefferson Av, '\n", | |
" 'right on Saratoga Av, left on Halsey St and regular route.\\n'\n", | |
" 'Corresponding stops will be made along detour route.\\n'\n", | |
" 'Allow additional travel time.\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Planned Detour\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'B38/B38 LTD - Eastbound buses rerouted from Lafayette Av '\n", | |
" 'betweenStuyvesant Av and Malcolm X Blvd\\r\\n'\n", | |
" 'Until further notice\\r\\n'\n", | |
" '\\n'\n", | |
" 'Due to construction, buses make corresponding stops along the '\n", | |
" 'detoured route.\\r\\n'\n", | |
" '\\n'\n", | |
" 'Show Reroute Details\\n'\n", | |
" 'Via Lafayette Av\\r\\n'\n", | |
" 'Right on Stuyvesant Av\\r\\n'\n", | |
" 'Left on Greene Av\\r\\n'\n", | |
" 'Left on Malcolm X Blvd\\r\\n'\n", | |
" 'Right on Lafayette Av then regular route\\n'\n", | |
" '\\n'\n", | |
" 'Reminder: B38 LTD buses do not operate overnight.\\r\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Planned Detour\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'B24 and B46/B46 LTD buses detoured from the Entrance on Broadway '\n", | |
" 'at Washington Plz Terminal\\r\\n'\n", | |
" 'Until further notice\\r\\n'\n", | |
" '\\n'\n", | |
" 'Due to construction, buses operate as follows:\\r\\n'\n", | |
" '\\n'\n", | |
" 'Via Broadway\\r\\n'\n", | |
" 'Right on Roebling St into Washington Plaza.\\r\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Planned Detour\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'B20 and Q24 - Northbound/Westbound buses detoured from Van '\n", | |
" 'Sinderen Av at BroadwayJunction due to construction - No stops '\n", | |
" 'missed\\r\\n'\n", | |
" 'Until further notice\\r\\n'\n", | |
" '\\n'\n", | |
" 'Show Reroute Details\\n'\n", | |
" 'Via Jamaica Av\\r\\n'\n", | |
" 'Into Fulton St\\r\\n'\n", | |
" 'Right on Eastern Pkwy\\r\\n'\n", | |
" 'Left on Broadway then regular route\\n'\n", | |
" '\\n'\n", | |
" 'Reminder: B20 buses do not operate overnight.\\r\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n',\n", | |
" 'notice': 'Service Change\\n'\n", | |
" '\\r\\n'\n", | |
" ' Posted:03/26/201510:13AM\\r\\n'\n", | |
" ' \\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'B11 buses are detoured due to road work on 4 Avand 50 St. \\n'\n", | |
" 'Detour is as follows:\\n'\n", | |
" 'Eastbound: Via 52 St, left on 5 Av, right on 50 St and regular '\n", | |
" 'route.\\n'\n", | |
" 'Allow additional travel time.\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Delays\\n'\n", | |
" '\\r\\n'\n", | |
" ' Posted:03/26/201510:08AM\\r\\n'\n", | |
" ' \\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'B63 buses are running with delays in both directions, due to '\n", | |
" 'road work on 5 Av between 7 St and 8 St.\\n'\n", | |
" 'Allow additional travel time.\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Service Change\\n'\n", | |
" '\\r\\n'\n", | |
" ' '}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>HTML Render of Payload text</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<span class=\"TitleServiceChange\" >Service Change</span>\r\n", | |
" <span class=\"DateStyle\">\r\n", | |
" Posted: 03/26/2015 10:13AM\r\n", | |
" </span>\r\n", | |
" <br/>\r\n", | |
" <br/>\r\n", | |
" <P><STRONG>B11</STRONG> buses are detoured due to road work on 4 Av and 50 St. </P>\r\n", | |
"<P>Detour is as follows:</P>\r\n", | |
"<P><STRONG>Eastbound</STRONG>: Via 52 St, left on 5 Av, right on 50 St and regular route.</P>\r\n", | |
"<P>Allow additional travel time.</P>\r\n", | |
" <br/>\r\n", | |
" <br/>\r\n", | |
" \r\n", | |
" <span class=\"TitleDelay\">Delays</span>\r\n", | |
" <span class=\"DateStyle\">\r\n", | |
" Posted: 03/26/2015 10:08AM\r\n", | |
" </span>\r\n", | |
" <br/>\r\n", | |
" <br/>\r\n", | |
" <P><STRONG>B63</STRONG> buses are running with delays in both directions, due to road work on 5 Av between 7 St and 8 St.</P>\r\n", | |
"<P>Allow additional travel time.</P>\r\n", | |
" <br/>\r\n", | |
" <br/>\r\n", | |
" \r\n", | |
" <span class=\"TitleServiceChange\" >Service Change</span>\r\n", | |
" <span class=\"DateStyle\">\r\n", | |
" Posted: 03/26/2015 10:07AM\r\n", | |
" </span>\r\n", | |
" <br/>\r\n", | |
" <br/>\r\n", | |
" <P><STRONG>B26</STRONG> buses are detoured, due to milling on Halsey St between Howard Av and Patchen Av.</P>\r\n", | |
"<P>Detour is as follows;</P>\r\n", | |
"<P><STRONG>Westbound</STRONG>: Via Halsey St, right on Howard Av, left on Hancock St, left on Reid Av, right on Halsey St and regular route.</P>\r\n", | |
"<P><STRONG>Eastbound</STRONG>: Via Halsey St, left on Reid St, right on Jefferson Av, right on Saratoga Av, left on Halsey St and regular route.</P>\r\n", | |
"<P>Corresponding stops will be made along detour route.</P>\r\n", | |
"<P>Allow additional travel time.</P>\r\n", | |
" <br/>\r\n", | |
" <br/>\r\n", | |
" \r\n", | |
" <span class=\"TitlePlannedWork\" >Planned Detour</span>\r\n", | |
" <br/>\r\n", | |
" <a class=\"plannedWorkDetailLink\" onclick=ShowHide(93500);>\r\n", | |
"<b>B38/B38</b> LTD<b> - Eastbound buses rerouted from Lafayette Av between<br clear= left>Stuyvesant Av and Malcolm X Blvd\r\n", | |
"</a><br/><br/><div id= 93500 class=\"plannedWorkDetail\" ></b>Until further notice\r\n", | |
"<br>\r\n", | |
"<br>Due to construction, buses make corresponding stops along the detoured route.\r\n", | |
"<br>\r\n", | |
"<br><b><i><a style=\"cursor:pointer; text-decoration:underline;\" onclick=ShowHide(935000);>Show Reroute Details</a></i></b>\r\n", | |
"<br><div id=\"935000\"; style=\"display:none;\">Via Lafayette Av\r\n", | |
"<br>Right on Stuyvesant Av\r\n", | |
"<br>Left on Greene Av\r\n", | |
"<br>Left on Malcolm X Blvd\r\n", | |
"<br>Right on Lafayette Av then regular route</div>\r\n", | |
"<br>\r\n", | |
"<br><b>Reminder</b>: <a href=http://web.mta.info/nyct/bus/schedule/bkln/b038cur.pdf target=_blank><font color=#0000FF><b><u>B38 LTD</u></b></font></a> buses do not operate overnight.\r\n", | |
"<br><b>\r\n", | |
"<br></div></b><br/>\r\n", | |
" <br/>\r\n", | |
" <br/>\r\n", | |
" \r\n", | |
" <span class=\"TitlePlannedWork\" >Planned Detour</span>\r\n", | |
" <br/>\r\n", | |
" <a class=\"plannedWorkDetailLink\" onclick=ShowHide(89552);>\r\n", | |
"<b>B24 and B46/B46 </b>LTD<b> buses detoured from the Entrance on Broadway at Washington Plz Terminal\r\n", | |
"</a><br/><br/><div id= 89552 class=\"plannedWorkDetail\" ></b>Until further notice\r\n", | |
"<br>\r\n", | |
"<br>Due to <i>c</i>onstruction, buses operate as follows:\r\n", | |
"<br>\r\n", | |
"<br>Via Broadway\r\n", | |
"<br>Right on Roebling St into Washington Plaza.\r\n", | |
"<br><b>\r\n", | |
"<br></div></b><br/>\r\n", | |
" <br/>\r\n", | |
" <br/>\r\n", | |
" \r\n", | |
" <span class=\"TitlePlannedWork\" >Planned Detour</span>\r\n", | |
" <br/>\r\n", | |
" <a class=\"plannedWorkDetailLink\" onclick=ShowHide(88492);>\r\n", | |
"<b>B20 and Q24 - Northbound/Westbound buses detoured from Van Sinderen Av at Broadway<br clear= left>Junction due to construction - No stops missed\r\n", | |
"</a><br/><br/><div id= 88492 class=\"plannedWorkDetail\" ></b>Until further notice\r\n", | |
"<br>\r\n", | |
"<br><b><i><a style=\"cursor:pointer; text-decoration:underline;\" onclick=ShowHide(884920);>Show Reroute Details</a></i></b>\r\n", | |
"<br><div id=\"884920\"; style=\"display:none;\">Via Jamaica Av\r\n", | |
"<br>Into Fulton St\r\n", | |
"<br>Right on Eastern Pkwy\r\n", | |
"<br>Left on Broadway then regular route</div>\r\n", | |
"<br>\r\n", | |
"<br><b>Reminder</b>: <a href=http://web.mta.info/nyct/bus/schedule/bkln/b020cur.pdf target=_blank><font color=#0000FF><b><u>B20</u></b></font></a> buses do not operate overnight.\r\n", | |
"<br><b>\r\n", | |
"<br></div></b><br/>\r\n", | |
" <br/>\r\n", | |
" <br/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'B100 - B103',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'BM1 - BM5',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'BX1 - BX55',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'BXM1 - BXM18',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '03/26/2015',\n", | |
" 'name': 'M1 - M116',\n", | |
" 'status': 'DELAYS',\n", | |
" 'text': 'Delays\\n'\n", | |
" '\\r\\n'\n", | |
" ' Posted:03/26/201510:10AM\\r\\n'\n", | |
" ' \\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'M5 buses are running with delays inboth directions, due to heavy '\n", | |
" 'traffic on Broadway between Houston St and 34 St andon 6 Av '\n", | |
" 'between Leonard Stand Watts St. \\n'\n", | |
" 'Allow additional travel time.\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Planned Detour\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'M15/M15 SBS - Northbound bus stop on 1 Av at 28 St temporarily '\n", | |
" 'closed\\r\\n'\n", | |
" 'Until Summer 2015\\r\\n'\n", | |
" '\\n'\n", | |
" 'Due to construction, please use nearby stops:\\r\\n'\n", | |
" '\\n'\n", | |
" 'M15: 1 Av at 25 St. \\r\\n'\n", | |
" 'M15 SBS: 1 Av at 24 St. \\r\\n'\n", | |
" '\\n'\n", | |
" 'Reminder: M15 SBS buses do not operate overnight.\\r\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Planned Detour\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'M15 - Northbound bus stop on 1 Av between E 86 St and E 87 St '\n", | |
" 'temporarily relocated\\r\\n'\n", | |
" 'Until further notice\\r\\n'\n", | |
" '\\n'\n", | |
" 'Due to construction, please use the temporary stop on 1 Av at E 87 '\n", | |
" 'St.\\r\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n',\n", | |
" 'time': '10:10AM'}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>NAIVE regex extraction attempt</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'datetime': '03/26/201510:10AM',\n", | |
" 'info': '\\r\\n'\n", | |
" ' \\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'M5 buses are running with delays inboth directions, due to heavy '\n", | |
" 'traffic on Broadway between Houston St and 34 St andon 6 Av '\n", | |
" 'between Leonard Stand Watts St. \\n'\n", | |
" 'Allow additional travel time.\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Planned Detour\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'M15/M15 SBS - Northbound bus stop on 1 Av at 28 St temporarily '\n", | |
" 'closed\\r\\n'\n", | |
" 'Until Summer 2015\\r\\n'\n", | |
" '\\n'\n", | |
" 'Due to construction, please use nearby stops:\\r\\n'\n", | |
" '\\n'\n", | |
" 'M15: 1 Av at 25 St. \\r\\n'\n", | |
" 'M15 SBS: 1 Av at 24 St. \\r\\n'\n", | |
" '\\n'\n", | |
" 'Reminder: M15 SBS buses do not operate overnight.\\r\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Planned Detour\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'M15 - Northbound bus stop on 1 Av between E 86 St and E 87 St '\n", | |
" 'temporarily relocated\\r\\n'\n", | |
" 'Until further notice\\r\\n'\n", | |
" '\\n'\n", | |
" 'Due to construction, please use the temporary stop on 1 Av at E 87 '\n", | |
" 'St.\\r\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n',\n", | |
" 'notice': 'Delays\\n\\r\\n '}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>HTML Render of Payload text</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<span class=\"TitleDelay\">Delays</span>\r\n", | |
" <span class=\"DateStyle\">\r\n", | |
" Posted: 03/26/2015 10:10AM\r\n", | |
" </span>\r\n", | |
" <br/>\r\n", | |
" <br/>\r\n", | |
" <P><STRONG>M5</STRONG> buses are running with delays in both directions, due to heavy traffic on Broadway between Houston St and 34 St and on 6 Av between Leonard St and Watts St. </P>\r\n", | |
"<P>Allow additional travel time.</P>\r\n", | |
" <br/>\r\n", | |
" <br/>\r\n", | |
" \r\n", | |
" <span class=\"TitlePlannedWork\" >Planned Detour</span>\r\n", | |
" <br/>\r\n", | |
" <a class=\"plannedWorkDetailLink\" onclick=ShowHide(94619);>\r\n", | |
"<b>M15/M15 SBS - Northbound bus stop on 1 Av at 28 St temporarily closed\r\n", | |
"</a><br/><br/><div id= 94619 class=\"plannedWorkDetail\" ></b>Until Summer 2015\r\n", | |
"<br>\r\n", | |
"<br>Due to construction, please use nearby stops:\r\n", | |
"<br>\r\n", | |
"<br><b>M15: </b> 1 Av<b> </b>at 25 St. \r\n", | |
"<br><b>M15 SBS: </b> 1 Av at 24 St. \r\n", | |
"<br>\r\n", | |
"<br><b>Reminder</b>: <a href=http://web.mta.info/nyct/bus/schedule/manh/m015scur.pdf target=_blank><font color=#0000FF><b><u>M15 SBS </u></b></font></a> buses do not operate overnight.\r\n", | |
"<br><b>\r\n", | |
"<br></div></b><br/>\r\n", | |
" <br/>\r\n", | |
" <br/>\r\n", | |
" \r\n", | |
" <span class=\"TitlePlannedWork\" >Planned Detour</span>\r\n", | |
" <br/>\r\n", | |
" <a class=\"plannedWorkDetailLink\" onclick=ShowHide(94109);>\r\n", | |
"<b>M15 - Northbound bus stop on 1 Av between E 86 St and E 87 St temporarily relocated\r\n", | |
"</a><br/><br/><div id= 94109 class=\"plannedWorkDetail\" ></b>Until further notice\r\n", | |
"<br>\r\n", | |
"<br>Due to construction, please use the temporary stop <b>on 1 Av</b> at E 87 St.\r\n", | |
"<br><b>\r\n", | |
"<br></div></b><br/>\r\n", | |
" <br/>\r\n", | |
" <br/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '03/26/2015',\n", | |
" 'name': 'Q1 - Q113',\n", | |
" 'status': 'PLANNED WORK',\n", | |
" 'text': 'Planned Detour\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Q7 and Q41- Eastbound/Northbound stop on Liberty Av at 95 St '\n", | |
" 'temporarily closed\\r\\n'\n", | |
" 'Until further notice\\r\\n'\n", | |
" '\\n'\n", | |
" 'Due to construction, please use the temporary stop on Cross Bay '\n", | |
" 'Blvd at Liberty Av.\\r\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Planned Detour\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Q66 Eastbound and Q100 LTD northbound buses detoured from 42 Rd '\n", | |
" 'between 27 St and Hunter St - No stops missed\\r\\n'\n", | |
" 'Until further notice\\r\\n'\n", | |
" '\\n'\n", | |
" 'Due to road obstruction, buses may experience delays.\\r\\n'\n", | |
" '\\n'\n", | |
" 'Show Reroute Details\\n'\n", | |
" 'Eastbound Q66\\r\\n'\n", | |
" 'Via 28 St\\r\\n'\n", | |
" 'Left on 42 Rd\\r\\n'\n", | |
" 'Right on Jackson Av\\r\\n'\n", | |
" 'Right on 43 Av\\r\\n'\n", | |
" 'Right on 23 Av then regular route\\r\\n'\n", | |
" '\\n'\n", | |
" 'Northbound Q100 LTD\\r\\n'\n", | |
" 'Via Jackson Av\\r\\n'\n", | |
" 'Right on 43 Av\\r\\n'\n", | |
" 'Right on 23 Av then regular route\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Planned Detour\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'B20 and Q24 - Northbound/Westbound buses detoured from Van '\n", | |
" 'Sinderen Av at BroadwayJunction due to construction - No stops '\n", | |
" 'missed\\r\\n'\n", | |
" 'Until further notice\\r\\n'\n", | |
" '\\n'\n", | |
" 'Show Reroute Details\\n'\n", | |
" 'Via Jamaica Av\\r\\n'\n", | |
" 'Into Fulton St\\r\\n'\n", | |
" 'Right on Eastern Pkwy\\r\\n'\n", | |
" 'Left on Broadway then regular route\\n'\n", | |
" '\\n'\n", | |
" 'Reminder: B20 buses do not operate overnight.\\r\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n',\n", | |
" 'time': '10:13AM'}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>HTML Render of Payload text</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<span class=\"TitlePlannedWork\" >Planned Detour</span>\r\n", | |
" <br/>\r\n", | |
" <a class=\"plannedWorkDetailLink\" onclick=ShowHide(92504);>\r\n", | |
"<b>Q7 and Q41- Eastbound/Northbound stop on Liberty Av at 95 St temporarily closed\r\n", | |
"</a><br/><br/><div id= 92504 class=\"plannedWorkDetail\" ></b>Until further notice\r\n", | |
"<br>\r\n", | |
"<br>Due to construction, please use the temporary stop <b>on Cross Bay Blvd</b> at Liberty Av.\r\n", | |
"<br><b>\r\n", | |
"<br></div></b><br/>\r\n", | |
" <br/>\r\n", | |
" <br/>\r\n", | |
" \r\n", | |
" <span class=\"TitlePlannedWork\" >Planned Detour</span>\r\n", | |
" <br/>\r\n", | |
" <a class=\"plannedWorkDetailLink\" onclick=ShowHide(94715);>\r\n", | |
"<b>Q66 Eastbound and Q100 </b>LTD<b> northbound </b> <b>buses detoured from 42 Rd between 27 St and Hunter St - No stops missed\r\n", | |
"</a><br/><br/><div id= 94715 class=\"plannedWorkDetail\" ></b>Until further notice\r\n", | |
"<br>\r\n", | |
"<br>Due to road obstruction, buses may experience delays.\r\n", | |
"<br>\r\n", | |
"<br><b><i><a style=\"cursor:pointer; text-decoration:underline;\" onclick=ShowHide(947150);>Show Reroute Details</a></i></b>\r\n", | |
"<br><div id=\"947150\"; style=\"display:none;\"><b>Eastbound Q66\r\n", | |
"<br></b>Via 28 St\r\n", | |
"<br>Left on 42 Rd\r\n", | |
"<br>Right on Jackson Av\r\n", | |
"<br>Right on 43 Av\r\n", | |
"<br>Right on 23 Av then regular route\r\n", | |
"<br>\r\n", | |
"<br><b>Northbound Q100 LTD\r\n", | |
"<br></b>Via Jackson Av\r\n", | |
"<br>Right on 43 Av\r\n", | |
"<br>Right on 23 Av then regular route</div>\r\n", | |
"<br><b>\r\n", | |
"<br></div></b><br/>\r\n", | |
" <br/>\r\n", | |
" <br/>\r\n", | |
" \r\n", | |
" <span class=\"TitlePlannedWork\" >Planned Detour</span>\r\n", | |
" <br/>\r\n", | |
" <a class=\"plannedWorkDetailLink\" onclick=ShowHide(88492);>\r\n", | |
"<b>B20 and Q24 - Northbound/Westbound buses detoured from Van Sinderen Av at Broadway<br clear= left>Junction due to construction - No stops missed\r\n", | |
"</a><br/><br/><div id= 88492 class=\"plannedWorkDetail\" ></b>Until further notice\r\n", | |
"<br>\r\n", | |
"<br><b><i><a style=\"cursor:pointer; text-decoration:underline;\" onclick=ShowHide(884920);>Show Reroute Details</a></i></b>\r\n", | |
"<br><div id=\"884920\"; style=\"display:none;\">Via Jamaica Av\r\n", | |
"<br>Into Fulton St\r\n", | |
"<br>Right on Eastern Pkwy\r\n", | |
"<br>Left on Broadway then regular route</div>\r\n", | |
"<br>\r\n", | |
"<br><b>Reminder</b>: <a href=http://web.mta.info/nyct/bus/schedule/bkln/b020cur.pdf target=_blank><font color=#0000FF><b><u>B20</u></b></font></a> buses do not operate overnight.\r\n", | |
"<br><b>\r\n", | |
"<br></div></b><br/>\r\n", | |
" <br/>\r\n", | |
" <br/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'QM1 - QM25',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '03/26/2015',\n", | |
" 'name': 'S40 - S98',\n", | |
" 'status': 'SERVICE CHANGE',\n", | |
" 'text': 'Service Change\\n'\n", | |
" '\\r\\n'\n", | |
" ' Posted:03/26/201510:07AM\\r\\n'\n", | |
" ' \\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'S53 buses are detoured, due to milling on Broadway between '\n", | |
" 'Castleton Avand Delafield Av. \\n'\n", | |
" 'Detour is as follows:\\n'\n", | |
" '4 Av bound: Via Castleton Av, right on Bement Av, right on Forest '\n", | |
" 'Av, left on Broadway and regular route.\\n'\n", | |
" 'Port Richmond bound: Via Broadway, right on Forest Av, left on '\n", | |
" 'Bement Av, left on Castleton Av and regular route.\\n'\n", | |
" 'Allow additional travel time.\\n'\n", | |
" '\\n',\n", | |
" 'time': '10:07AM'}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>NAIVE regex extraction attempt</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'datetime': '03/26/201510:07AM',\n", | |
" 'info': '\\r\\n'\n", | |
" ' \\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'S53 buses are detoured, due to milling on Broadway between '\n", | |
" 'Castleton Avand Delafield Av. \\n'\n", | |
" 'Detour is as follows:\\n'\n", | |
" '4 Av bound: Via Castleton Av, right on Bement Av, right on Forest '\n", | |
" 'Av, left on Broadway and regular route.\\n'\n", | |
" 'Port Richmond bound: Via Broadway, right on Forest Av, left on '\n", | |
" 'Bement Av, left on Castleton Av and regular route.\\n'\n", | |
" 'Allow additional travel time.\\n'\n", | |
" '\\n',\n", | |
" 'notice': 'Service Change\\n\\r\\n '}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>HTML Render of Payload text</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<span class=\"TitleServiceChange\" >Service Change</span>\r\n", | |
" <span class=\"DateStyle\">\r\n", | |
" Posted: 03/26/2015 10:07AM\r\n", | |
" </span>\r\n", | |
" <br/>\r\n", | |
" <br/>\r\n", | |
" <P><STRONG>S53</STRONG> buses are detoured, due to milling on Broadway between Castleton Av and Delafield Av. </P>\r\n", | |
"<P>Detour is as follows:</P>\r\n", | |
"<P><STRONG>4 Av bound</STRONG>: Via Castleton Av, right on Bement Av, right on Forest Av, left on Broadway and regular route.</P>\r\n", | |
"<P><STRONG>Port Richmond bound:</STRONG> Via Broadway, right on Forest Av, left on Bement Av, left on Castleton Av and regular route.</P>\r\n", | |
"<P>Allow additional travel time.</P>\r\n", | |
" <br/>\r\n", | |
" <br/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'x1 - x68',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<H1>BT Status Information</H1>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Bronx-Whitestone',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Cross Bay',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Henry Hudson',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '03/26/2015',\n", | |
" 'name': 'Hugh L. Carey',\n", | |
" 'status': 'SERVICE CHANGE',\n", | |
" 'text': 'Service Change\\n'\n", | |
" '\\r\\n'\n", | |
" ' Posted:03/26/2015 5:40AM\\r\\n'\n", | |
" ' \\r\\n'\n", | |
" ' HOV Lane Open 6 AM to 10 AM. Two-Way Operations in effect. Three '\n", | |
" '(3) lanes Manhattan-bound. One (1) lane Brooklyn-bound.\\r\\n'\n", | |
" ' ',\n", | |
" 'time': ' 5:40AM'}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>NAIVE regex extraction attempt</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'datetime': '03/26/2015 5:40AM',\n", | |
" 'info': '\\r\\n'\n", | |
" ' \\r\\n'\n", | |
" ' HOV Lane Open 6 AM to 10 AM. Two-Way Operations in effect. Three '\n", | |
" '(3) lanes Manhattan-bound. One (1) lane Brooklyn-bound.\\r\\n'\n", | |
" ' ',\n", | |
" 'notice': 'Service Change\\n\\r\\n '}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>HTML Render of Payload text</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<span class=\"TitleServiceChange\" >Service Change</span>\r\n", | |
" <span class=\"DateStyle\">\r\n", | |
" Posted: 03/26/2015 5:40AM\r\n", | |
" </span><br/><br/>\r\n", | |
" HOV Lane Open 6 AM to 10 AM. Two-Way Operations in effect. Three (3) lanes Manhattan-bound. One (1) lane Brooklyn-bound.\r\n", | |
" <br/><br/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Marine Parkway',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '03/26/2015',\n", | |
" 'name': 'Queens Midtown',\n", | |
" 'status': 'DELAYS',\n", | |
" 'text': 'Delays\\n'\n", | |
" '\\r\\n'\n", | |
" ' Posted:03/26/2015 8:35AM\\r\\n'\n", | |
" ' \\r\\n'\n", | |
" ' OFF PROPERTY DELAYS AFFECTING THE QUEENS MIDTOWN TUNNEL - '\n", | |
" 'WESTBOUND (MANHATTAN BOUND). EXPECT DELAYS.\\r\\n'\n", | |
" ' \\n'\n", | |
" 'Service Change\\n'\n", | |
" '\\r\\n'\n", | |
" ' Posted:03/26/2015 5:41AM\\r\\n'\n", | |
" ' \\r\\n'\n", | |
" ' HOV Lane Open 6 AM to 10 AM. Two-Way Operations in effect. Three '\n", | |
" '(3) lanes Manhattan-bound. One (1) lane Queens-bound.\\r\\n'\n", | |
" ' ',\n", | |
" 'time': ' 8:35AM'}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>NAIVE regex extraction attempt</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'datetime': '03/26/2015 5:41AM',\n", | |
" 'info': '\\r\\n'\n", | |
" ' \\r\\n'\n", | |
" ' HOV Lane Open 6 AM to 10 AM. Two-Way Operations in effect. Three '\n", | |
" '(3) lanes Manhattan-bound. One (1) lane Queens-bound.\\r\\n'\n", | |
" ' ',\n", | |
" 'notice': 'Delays\\n'\n", | |
" '\\r\\n'\n", | |
" ' Posted:03/26/2015 8:35AM\\r\\n'\n", | |
" ' \\r\\n'\n", | |
" ' OFF PROPERTY DELAYS AFFECTING THE QUEENS MIDTOWN TUNNEL - '\n", | |
" 'WESTBOUND (MANHATTAN BOUND). EXPECT DELAYS.\\r\\n'\n", | |
" ' \\n'\n", | |
" 'Service Change\\n'\n", | |
" '\\r\\n'\n", | |
" ' '}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>HTML Render of Payload text</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<span class=\"TitleDelay\">Delays</span>\r\n", | |
" <span class=\"DateStyle\">\r\n", | |
" Posted: 03/26/2015 8:35AM\r\n", | |
" </span><br/><br/>\r\n", | |
" OFF PROPERTY DELAYS AFFECTING THE QUEENS MIDTOWN TUNNEL - WESTBOUND (MANHATTAN BOUND). EXPECT DELAYS.\r\n", | |
" <br/><br/>\r\n", | |
" \r\n", | |
" <span class=\"TitleServiceChange\" >Service Change</span>\r\n", | |
" <span class=\"DateStyle\">\r\n", | |
" Posted: 03/26/2015 5:41AM\r\n", | |
" </span><br/><br/>\r\n", | |
" HOV Lane Open 6 AM to 10 AM. Two-Way Operations in effect. Three (3) lanes Manhattan-bound. One (1) lane Queens-bound.\r\n", | |
" <br/><br/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '03/17/2015',\n", | |
" 'name': 'Robert F. Kennedy',\n", | |
" 'status': 'SERVICE CHANGE',\n", | |
" 'text': 'Service Change\\n'\n", | |
" '\\r\\n'\n", | |
" ' Posted:03/17/2015 6:28AM\\r\\n'\n", | |
" ' \\r\\n'\n", | |
" \" One of two lanes on the Robert F. Kennedy Bridge's ramp leading \"\n", | |
" 'from the bridge to the Bruckner Expressway will be closed for '\n", | |
" 'repair work March 16 through April 13. One lane on the ramp will '\n", | |
" 'remain open at all times. \\n'\n", | |
" 'Motorists may experience delays and should allot extra travel '\n", | |
" 'time.\\n',\n", | |
" 'time': ' 6:28AM'}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>NAIVE regex extraction attempt</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'datetime': '03/17/2015 6:28AM',\n", | |
" 'info': '\\r\\n'\n", | |
" ' \\r\\n'\n", | |
" \" One of two lanes on the Robert F. Kennedy Bridge's ramp leading \"\n", | |
" 'from the bridge to the Bruckner Expressway will be closed for '\n", | |
" 'repair work March 16 through April 13. One lane on the ramp will '\n", | |
" 'remain open at all times. \\n'\n", | |
" 'Motorists may experience delays and should allot extra travel '\n", | |
" 'time.\\n',\n", | |
" 'notice': 'Service Change\\n\\r\\n '}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>HTML Render of Payload text</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<span class=\"TitleServiceChange\" >Service Change</span>\r\n", | |
" <span class=\"DateStyle\">\r\n", | |
" Posted: 03/17/2015 6:28AM\r\n", | |
" </span><br/><br/>\r\n", | |
" One of two lanes on the Robert F. Kennedy Bridge's ramp leading <SPAN style=\"FONT-FAMILY: 'Arial','sans-serif'\">from the bridge to the Bruckner Expressway will be closed for repair work <B>March 16 through April 13. </B>One lane on the ramp will remain open at all times. </SPAN>\r\n", | |
"<P style=\"MARGIN: 0in 0in 0pt 0.75in; LINE-HEIGHT: 115%\"><SPAN style=\"FONT-FAMILY: 'Arial','sans-serif'\"><FONT size=3> </FONT></SPAN></P><SPAN style=\"FONT-SIZE: 12pt; FONT-FAMILY: 'Arial','sans-serif';\">Motorists may experience delays and should allot extra travel time.</SPAN>\r\n", | |
" <br/><br/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Throgs Neck',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '03/26/2015',\n", | |
" 'name': 'Verrazano-Narrows',\n", | |
" 'status': 'SERVICE CHANGE',\n", | |
" 'text': 'Service Change\\n'\n", | |
" '\\r\\n'\n", | |
" ' Posted:03/26/2015 8:54AM\\r\\n'\n", | |
" ' \\r\\n'\n", | |
" ' Due to a fog condition motorists are asked to drive at reduced '\n", | |
" 'speeds.\\r\\n'\n", | |
" ' \\n'\n", | |
" 'Service Change\\n'\n", | |
" '\\r\\n'\n", | |
" ' Posted:03/16/2015 6:24AM\\r\\n'\n", | |
" ' \\n'\n", | |
" '\\n'\n", | |
" 'VNB - UPPER LEVEL TRAFFIC PATTERN \\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Monday thru Thursday: 5 AM to 9:30 AM (3) Lanes Brooklyn-bound. '\n", | |
" '(2) Lanes Staten Island-bound.\\n'\n", | |
" '2:30 PM to 10PM (3) Lanes Staten Island-bound. (2) Lanes '\n", | |
" 'Brooklyn-bound.\\n'\n", | |
" '\\n'\n", | |
" 'Friday: 5 AM to 9:30 AM (3) Lanes Bklyn-bnd. (2) Lanes Staten '\n", | |
" 'Island-bound.\\n'\n", | |
" '1:30 PM to 11:59 PM (3) Lanes Staten Island-bound. (2) Lanes '\n", | |
" 'Brooklyn-bound.\\n'\n", | |
" '\\n'\n", | |
" 'Saturday and Sunday:\\n'\n", | |
" 'Three (3) Lanes Staten Island-bound. Two (2) Lanes '\n", | |
" 'Brooklyn-bound.\\n'\n", | |
" '\\n',\n", | |
" 'time': ' 8:54AM'}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>NAIVE regex extraction attempt</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'datetime': '03/16/2015 6:24AM',\n", | |
" 'info': '\\r\\n'\n", | |
" ' \\n'\n", | |
" '\\n'\n", | |
" 'VNB - UPPER LEVEL TRAFFIC PATTERN \\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" '\\n'\n", | |
" 'Monday thru Thursday: 5 AM to 9:30 AM (3) Lanes Brooklyn-bound. '\n", | |
" '(2) Lanes Staten Island-bound.\\n'\n", | |
" '2:30 PM to 10PM (3) Lanes Staten Island-bound. (2) Lanes '\n", | |
" 'Brooklyn-bound.\\n'\n", | |
" '\\n'\n", | |
" 'Friday: 5 AM to 9:30 AM (3) Lanes Bklyn-bnd. (2) Lanes Staten '\n", | |
" 'Island-bound.\\n'\n", | |
" '1:30 PM to 11:59 PM (3) Lanes Staten Island-bound. (2) Lanes '\n", | |
" 'Brooklyn-bound.\\n'\n", | |
" '\\n'\n", | |
" 'Saturday and Sunday:\\n'\n", | |
" 'Three (3) Lanes Staten Island-bound. Two (2) Lanes '\n", | |
" 'Brooklyn-bound.\\n'\n", | |
" '\\n',\n", | |
" 'notice': 'Service Change\\n'\n", | |
" '\\r\\n'\n", | |
" ' Posted:03/26/2015 8:54AM\\r\\n'\n", | |
" ' \\r\\n'\n", | |
" ' Due to a fog condition motorists are asked to drive at reduced '\n", | |
" 'speeds.\\r\\n'\n", | |
" ' \\n'\n", | |
" 'Service Change\\n'\n", | |
" '\\r\\n'\n", | |
" ' '}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>HTML Render of Payload text</h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<span class=\"TitleServiceChange\" >Service Change</span>\r\n", | |
" <span class=\"DateStyle\">\r\n", | |
" Posted: 03/26/2015 8:54AM\r\n", | |
" </span><br/><br/>\r\n", | |
" Due to a fog condition motorists are asked to drive at reduced speeds.\r\n", | |
" <br/><br/>\r\n", | |
" \r\n", | |
" <span class=\"TitleServiceChange\" >Service Change</span>\r\n", | |
" <span class=\"DateStyle\">\r\n", | |
" Posted: 03/16/2015 6:24AM\r\n", | |
" </span><br/><br/>\r\n", | |
" <FONT size=3 face=\"Times New Roman\"></FONT>\r\n", | |
"<P style=\"MARGIN: 0in 0in 0pt\"><B><FONT size=3><FONT face=\"Times New Roman\">VNB - UPPER LEVEL TRAFFIC PATTERN </FONT></FONT></B></P><FONT size=3 face=\"Times New Roman\"></FONT>\r\n", | |
"<P style=\"MARGIN: 0in 0in 0pt\">\r\n", | |
"<P style=\"MARGIN: 0in 0in 0pt\"><SPAN style=\"FONT-FAMILY: 'Calibri',sans-serif; COLOR: black\"><FONT size=3></FONT></SPAN></P>\r\n", | |
"<P> </P>\r\n", | |
"<P><SPAN style=\"FONT-FAMILY: 'Calibri',sans-serif; COLOR: black\"><FONT size=3>Monday thru Thursday: 5 AM to 9:30 AM (3) Lanes Brooklyn-bound. (2) Lanes Staten Island-bound.</FONT></P>\r\n", | |
"<P style=\"MARGIN: 0in 0in 8pt\"><FONT size=3>2:30 PM to 10PM (3) Lanes Staten Island-bound. (2) Lanes Brooklyn-bound.</FONT></P>\r\n", | |
"<P style=\"MARGIN: 0in 0in 8pt\"><FONT size=3> </FONT></P>\r\n", | |
"<P style=\"MARGIN: 0in 0in 8pt\"><FONT size=3><SPAN> </SPAN></FONT><FONT size=3>Friday: 5 AM to 9:30 AM (3) Lanes Bklyn-bnd. (2) Lanes Staten Island-bound.</FONT></P>\r\n", | |
"<P style=\"MARGIN: 0in 0in 8pt\"><FONT size=3>1:30 PM to 11:59 PM (3) Lanes Staten Island-bound. (2) Lanes Brooklyn-bound.</FONT></P>\r\n", | |
"<P style=\"MARGIN: 0in 0in 8pt\"><FONT size=3> </FONT></P>\r\n", | |
"<P style=\"MARGIN: 0in 0in 8pt\"><FONT size=3><SPAN> </SPAN></FONT><FONT size=3>Saturday and Sunday:</FONT></P>\r\n", | |
"<P style=\"MARGIN: 0in 0in 8pt\"><FONT size=3> </FONT><FONT size=3>Three (3) Lanes Staten Island-bound. Two (2) Lanes Brooklyn-bound.</FONT></P>\r\n", | |
"<P style=\"MARGIN: 0in 0in 0pt\"></SPAN><FONT size=3 face=\"Times New Roman\"> </FONT></P>\r\n", | |
" <br/><br/>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<H1>LIRR Status Information</H1>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Babylon',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'City Terminal Zone',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Far Rockaway',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Hempstead',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Long Beach',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Montauk',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Oyster Bay',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Port Jefferson',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Port Washington',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Ronkonkoma',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'West Hempstead',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<H1>METRONORTH Status Information</H1>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Hudson',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Harlem',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Wassaic',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'New Haven',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'New Canaan',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Danbury',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Waterbury',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Pascack Valley',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h2>JSON Friendly dic/hash<h2>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'date': '',\n", | |
" 'name': 'Port Jervis',\n", | |
" 'status': 'GOOD SERVICE',\n", | |
" 'text': '',\n", | |
" 'time': ''}\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<hr />" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"# Each service has multiple line nodes which has date, time, name, status and payload text\n", | |
"# line \n", | |
"# date\n", | |
"# time\n", | |
"# name\n", | |
"# status\n", | |
"# text\n", | |
"attributes = 'date time name status text'.split()\n", | |
"\n", | |
"# This regex will parse delay text messages\n", | |
"# \n", | |
"rex_withposted = '(?P<notice>.+)Posted:(?P<datetime>[\\d/:\\s]+(AM|PM))(?P<info>.+)$'\n", | |
"rex_withposted = re.compile(rex_withposted, re.IGNORECASE|re.MULTILINE|re.DOTALL)\n", | |
"\n", | |
"for servicename in services:\n", | |
" display(HTML( '<H1>{} Status Information</H1>'.format(servicename.upper())))\n", | |
"\n", | |
" for line in getattr(soup.service, servicename).find_all('line'):\n", | |
" name = line.find('name').get_text()\n", | |
" status = line.find('status').get_text()\n", | |
" date = line.find('date').get_text()\n", | |
" time = line.find('time').get_text()\n", | |
" html = line.find('text').get_text().strip()\n", | |
" dic = { 'name' : name, 'status' : status, \n", | |
" 'date' : date, 'time' : time,\n", | |
" 'text' : Soup(html).get_text().encode('ascii','ignore').decode(\"utf-8\")\n", | |
" }\n", | |
" \n", | |
" # remove extra whitespace from text\n", | |
" dic['text'] = re.sub(' {3,}', ' ', dic['text'])\n", | |
"# print('\\n----\\n{}\\n----\\n'.format(dic['text']))\n", | |
" display(HTML('<h2>JSON Friendly dic/hash<h2>'))\n", | |
" pprint.pprint(dic)\n", | |
" \n", | |
" if len(html):\n", | |
" \n", | |
" match = rex_withposted.match(dic['text'])\n", | |
" if match:\n", | |
" display(HTML('<h2>NAIVE regex extraction attempt</h2>'))\n", | |
" pprint.pprint(match.groupdict())\n", | |
" display(HTML('<h2>HTML Render of Payload text</h2>'))\n", | |
" display(HTML(html))\n", | |
" display(HTML('<hr />'))\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"lines = soup.service.subway.find_all('line')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 76, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"l = lines[0]\n", | |
"#name status text date time\n", | |
"name = l.find('name').get_text()\n", | |
"text = l.find('text').get_text()\n", | |
"status, dt, tm = l.status.get_text(), l.date.get_text(), l.time.get_text()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 81, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"\r\n", | |
" <span class=\"TitlePlannedWork\" >Planned Work</span>\r\n", | |
" <br/>\r\n", | |
" <a class=\"plannedWorkDetailLink\" onclick=ShowHide(91849);>\r\n", | |
"<b>[FT]<br clear=left[3] No trains running\r\n", | |
"</a><br/><br/><div id= 91849 class=\"plannedWorkDetail\" >[2] [4] trains and [SB] free shuttle buses provide alternate service\r\n", | |
"<br></b>Late Nights, </font>10 PM to 5 AM, Mon to Fri, Mar 23 - 27\r\n", | |
"<br>\r\n", | |
"<br>[SB] buses make all [3] stops between <b>148 St</b> and <b>96 St</b>.\r\n", | |
"<br>[2] trains make all [3] stops between <b>96 St</b> and <b>Franklin Av</b>.\r\n", | |
"<br>[4] trains make all [3] stops between <b>Nevins St</b> and <b>New Lots Av</b>.\r\n", | |
"<br>\r\n", | |
"<br>• Transfer between [SB] buses and [2] trains at <b>96 St</b>.\r\n", | |
"<br>• Transfer between [2] [4] trains at <b>Franklin Av</b> <i>or </i><b>Nevins St</b>.\r\n", | |
"<br>\r\n", | |
"<br></font><table class=plannedworkTableStyle border=1 cellspacing=1 cellpadding=3 rules=rows frame=hsides><tr bgcolor=#FAF8CC><td><b>Station </b></td><td><b>Shuttle Bus Stop</b><td><b>Bus</b><tr><td>148 St<td>Adam Clayton Powell Jr Blvd at 149 St<td>[SB] <tr><td>145 St<td>Lenox Av at 145 St<td>[SB], <b>M7</b>, <b>M102</b><tr><td>135 St<td>Lenox Av at 135 St<td>[SB], <b>M7</b>, <b>M102</b><tr><td>125 St<td>Lenox Av at 125 St<td>[SB], <b>M7</b>, <b>M102</b><tr><td>116 St<td>Lenox Av at 116 St<td>[SB], <b>M7</b>, <b>M102</b><tr><td>110 St<td>Lenox Av at 111 St<td>[SB]<tr><td>96 St [ad] [1] [2] <td>Broadway at 95 St<td>[SB]</table>\r\n", | |
"<br><b>\r\n", | |
"<br></div></b><br/>\r\n", | |
" <br/><br/>\r\n", | |
" \r\n", | |
" <span class=\"TitlePlannedWork\" >Planned Work</span>\r\n", | |
" <br/>\r\n", | |
" <a class=\"plannedWorkDetailLink\" onclick=ShowHide(91851);>\r\n", | |
"<b>[FT]<br clear=left> [2] No trains between 3 Av-149 St and 96 St\r\n", | |
"</a><br/><br/><div id= 91851 class=\"plannedWorkDetail\" >[SB] Free shuttle buses provide alternate service\r\n", | |
"<br></b>Late Nights, 10 PM to 5 AM, Mon to Fri, Mar 23 - 27\r\n", | |
"<br>\r\n", | |
"<br><b>[2] service operates in two sections:\r\n", | |
"<br></b> 1. Between <b>241 St</b> and <b>3 Av-149 St</b>, trains <i>from</i> 241 St skip<b> Jackson Av\r\n", | |
"<br></b> 2. Between <b>96 St</b> and <b>Flatbush Av\r\n", | |
"<br></b> \r\n", | |
"<br><table class=plannedworkTableStyle width=80%><tr><td></font><font size=3><b>Travel Alternatives</b></font><td align=right>[TP]</table> \r\n", | |
"<br>[SB]</font> <b>buses operate along two routes:\r\n", | |
"<br>\r\n", | |
"<br></b><i>[SB] Bronx-Manhattan Shuttle Bus</i> between <b>3 Av-149 St [2]</b> and <b>96 St [1] [2]</b>, stopping at \r\n", | |
"<br>149 St-Grand Concourse <b>[4]</b>, 145, 135, 125, 116, and 110 Sts.\r\n", | |
"<br><i>[SB] Bronx Only Shuttle Bus</i> <i>from</i> <b>Jackson Av [2]</b> and <b>149 St-Grand Concourse [4]</b>, \r\n", | |
"<br>stopping at 3 Av-149 St [2].\r\n", | |
"<br>\r\n", | |
"<br>• Transfer between <b>[2]</b> trains and [SB] buses at 3 Av-149 St <i>and/or</i> 96 St.\r\n", | |
"<br>• Transfer between <b>[4] </b>trains and [SB] buses at 149 St-Grand Concourse.\r\n", | |
"<br>\r\n", | |
"<br>For service <i>to</i> <b>Jackson Av</b>, take the [2] to 3 Av-149 St <i>where it will become</i> a Wakefield-bound [2].\r\n", | |
"<br>For service <i>from</i> this station, take a free [SB] bus.\r\n", | |
"<br>\r\n", | |
"<br><table class=plannedworkTableStyle border=1 cellspacing=1 cellpadding=3 rules=rows frame=hsides><tr><td colspan=3 align=center><i>Bronx-Manhattan Shuttle Bus</i><tr bgcolor=#FAF8CC><td><b>Station </b></td><td><b>[SB] Bus Stop</b><td><b>Bus</b><tr><td>3 Av-149 St [ad] [2]<td>149 St at 3 Av<td align=center>—<tr><td>149 St-Grand Concourse [4] <td>149 St at Grand Concourse<td><b>Bx19<tr><td>145 St<td>Lenox Av at 145 St<td align=center>—</font><tr><td>135 St<td>Lenox Av at 135 St<td><b>M7<tr><td>125 St<td>Lenox Av at 125 St<td><b>M7<tr><td>116 St<td>Lenox Av at 116 St<td><b>M7<tr><td>110 St<td>Lenox Av at 111 St<td align=center>—</font><tr><td>96 St [ad] [1] [2]<td>Broadway at 95 St<td align=center>—<tr><td colspan=3 align=center><i>Bronx Shuttle Bus</i><tr bgcolor=#FAF8CC><td><b>Station </b></td><td><b>[SB] Bus Stop</b><td><b>Bus</b><tr><td>Jackson Av<td>Westchester Av at Jackson Av (pick up only)<td><b>Bx4</b><tr><td>3 Av-149 St [ad] [2]<td>Melrose Av at 150 St (to 149 St-Grand Concourse only)<td align=center>—</font><tr><td>149 St at Grand Concourse [4] <td align=center>—<td align=center>—</table>\r\n", | |
"<br><b>Notes:</b> No [5] trains between E 180 St and Bowling Green.\r\n", | |
"<br> No [3] trains running during this time.\r\n", | |
"<br>\r\n", | |
"<br><table class=plannedworkTableStyle border=1 cellspacing=1 cellpadding=5 rules=none frame=box><td> [ad] <td><font size=1>This service change affects one or more ADA accessible stations. Please call 511 for help with planning<br>your trip. If you are deaf or hard of hearing, use your preferred relay service provider or the free 711 relay. </font></table>\r\n", | |
"<br><b>\r\n", | |
"<br></div></b><br/>\r\n", | |
" <br/><br/>\r\n", | |
" " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"display(HTML(text))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.4.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment