Skip to content

Instantly share code, notes, and snippets.

Created August 10, 2016 19:34
Show Gist options
  • Save anonymous/a8f0e747941b7d217fdf691d041f4c86 to your computer and use it in GitHub Desktop.
Save anonymous/a8f0e747941b7d217fdf691d041f4c86 to your computer and use it in GitHub Desktop.
import urllib.request
from bs4 import BeautifulSoup
def get_hmtl(url):
response = urllib.request.urlopen(url)
return response.read()
def parse(html):
soup = BeautifulSoup(html, "html.parser")
table = soup.find('table', class_='schedule_table_classic')
# rows = table.find_all('tr')
rows = [tr for tr in table.find('tbody').find_all('tr') if
'indication_gone' not in tr.attrs['class']]
trains = []
secs_in_hour = 3600
secs_in_minute = 60
for row in rows:
cols = row.select('td div.hidden-value')
departure_time = float(cols[0].text)
arrival_time = float(cols[1].text)
travel_time = float(cols[2].text)
trains.append({
'departure_time': str(round(departure_time / secs_in_hour)),
'arrival_time': round(arrival_time / secs_in_hour),
'travel_time': round(travel_time / secs_in_minute),
})
import pprint
pprint.pprint(trains)
def main():
parse(get_hmtl('http://www.tutu.ru/rasp.php?st1=48707&st2=20000&date=10.08.2016'))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment