Bus parser
# -*- encoding: utf-8 -*- | |
import requests | |
import bs4 | |
def ebus(rid): | |
# use rid to form the url | |
url = 'http://e-bus.tpc.gov.tw/pda/online.php?rid=%s' % rid | |
# there are two arrays which are go and back | |
go_route = [] | |
back_route = [] | |
response = requests.get(url) | |
if response.content: | |
soup = bs4.BeautifulSoup(response.content, 'lxml') | |
for tr in soup.find_all('tr'): | |
# there are four td in each tr | |
# <go place> <go time> <back place> <back time> | |
tds = [td.get_text() for td in list(tr.find_all('td'))] | |
if tds and len(tds) == 4: | |
if tds[0] and tds[1]: | |
go_route.append({'place': tds[0], 'time': tds[1]}) | |
if tds[2] and tds[3]: | |
back_route.append({'place': tds[2], 'time': tds[3]}) | |
return go_route, back_route | |
def loveBus(name): | |
# use name to form the url | |
url = 'http://pda.5284.com.tw/MQS/businfo2.jsp?routename=%s' % name | |
# there are two arrays which are go and back | |
go_route = [] | |
back_route = [] | |
response = requests.get(url) | |
if response.content: | |
soup = bs4.BeautifulSoup(response.content, 'lxml') | |
# use counter to distinguish go(0) and back(1) | |
counter = 0 | |
for td in soup.find_all('td', valign='top'): | |
for tr in td.find_all('tr'): | |
# place and time in tr.stripped_strings | |
route = list(tr.stripped_strings) | |
if route and len(route) == 2: | |
item = {'place': route[0], 'time': route[1]} | |
# go route | |
if counter == 0: | |
go_route.append(item) | |
# back route | |
elif counter == 1: | |
back_route.append(item) | |
counter += 1 | |
return go_route, back_route | |
if __name__ == '__main__': | |
# ebus('5127') | |
loveBus('27') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment