Skip to content

Instantly share code, notes, and snippets.

@lynnporu
Created September 4, 2019 16:56
Show Gist options
  • Save lynnporu/8e565af90b773794a6cee2adfe8f73a4 to your computer and use it in GitHub Desktop.
Save lynnporu/8e565af90b773794a6cee2adfe8f73a4 to your computer and use it in GitHub Desktop.
import re
import requests
import json
from bs4 import BeautifulSoup
class Station(object):
# Destinations cache is dict, where key is ID and value is list of Station
CACHE = dict()
# Created is a dict of created stations, where key is sid or name.
CREATED = dict()
"""
def __new__(cls, **kwargs):
# Search station using any of given data
for key in ["sid", "name"]:
if key in kwargs and kwargs[key] in Station.CREATED:
return Station.CREATED
initialized = super(Station, cls).__new__(cls)
for key in ["sid", "name"]:
if key in kwargs:
createdOn = kwargs[key]
Station.CREATED[createdOn] = initialized
return Station.CREATED[createdOn]
"""
def __init__(self, **kwargs):
"""Creates a station.
Expected keys: sid, name, direction.
ID of directions:
1 - Donetska
2 - Lvivska
3 - Odeska
4 - Pivdenna
5 - Pivdenno-Zakhidna
6 - Prydniprovska
11 - Donetska
12 - Krasnolymanska
13 - Luhanska
21 - Lvivska
22 - Ternopilska
23 - Rivnenska
24 - Ivano-Frankivska
25 - Uzhhorodska
31 - Odeska
32 - Shevchenkivska
33 - Znam'yanska
34 - Khersonska
42 - Kharkivska
43 - Sumska
44 - Poltavska
45 - Kup'yanska
51 - Kyyivska
52 - Koziatynska
53 - Zhmerynska
54 - Korostenska
55 - Konotopska
61 - Dniprovska
62 - Kryvorizka
63 - Zaporizka
64 - Krymska
"""
self.__dict__.update(kwargs)
self.withID = "sid" in kwargs
if "name" in kwargs:
self.name = self.name \
.replace(" ", "") \
.strip() \
.lstrip("\u0437.\u043f.") # з.п.
def __setattr__(self, name, value):
if name in ["sid", "name"]:
if name in self.__dict__:
Station.CREATED.pop(self.__dict__[name], None)
Station.CREATED[value] = self
self.__dict__[name] = value
@property
def destinations(self):
if self.sid in Station.CACHE:
return Station.CACHE[self.sid]
else:
return self.loadDestinations()
def loadDestinations(self):
items = list()
soup = BeautifulSoup(
requests.get((
"https://swrailway.gov.ua/timetable/eltrain3-7/"
f"?AJ=sids2_list&lng=&sid={self.sid}"
)).text,
"html.parser"
)
for station in soup.findAll("a"):
items.append(Station(
sid=int(
re.findall(
r"sid2=(\d+)",
station["href"])[0]),
name=station.text))
Station.CACHE[self.sid] = items
return items
@staticmethod
def fromQuery(query):
def processStation(data):
directions = ["Дон.", "Льв.", "Од.", "Півд.", "П-Зах.", "Придн."]
info = data["info"].split(" | ")[1]
try:
direction = info
"""
direction = (
(directions.index(info[1]) + 1) * 10 +
int(info[0].split("-")[1])
)
"""
except ValueError:
direction = directions.index(info[0]) + 1
return Station(
sid=data["id"],
name=data["label"],
direction=direction
)
return map(
processStation,
json.loads(
requests.get((
"https://swrailway.gov.ua/timetable/eltrain3-7/"
f"?JSON=station&term={query}")).text)
)
def __repr__(self):
def showDict():
return " ".join(map(
lambda item: f"{item[0]}={item[1]}",
filter(
lambda item: item[0] not in ["withID"],
self.__dict__.items()
)
))
return f"<Station {showDict()}>"
import re
from objects import Station
class Time:
def __init__(self, string):
if string == "–":
self.isEmpty = True
return
else:
self.isEmpty = False
self.hour, self.minute = list(map(int, string.split(":")))
def __repr__(self):
return (
"<Time (empty)>"
if self.isEmpty else
f"<Time hour={self.hour} minute={self.minute}>"
)
class Date:
def __init__(self, string):
self.year, self.month, self.day = list(map(int, string.split("-")))
def __repr__(self):
return f"<Date year={self.year} month={self.month} day={self.day}>"
class Train:
def __init__(self, string):
try:
self.tid = list(map(int, string.split("/")))
except ValueError:
self.tid = int(re.findall(r"\((\d+)\)", string)[0])
def __repr__(self):
return "<Train id=" + "/".join(str(n) for n in self.tid) + ">"
class Regularity:
D_NAMES = ["пн", "вт", "ср", "чт", "пт", "сб", "нд"]
def __init__(self, string):
if string == "щоденно":
self.everyDay = True
return
else:
self.everyDay = False
self.days = list(
map(
lambda found: Regularity.D_NAMES.index(found[1]),
re.findall(r"(\s([а-я]{2}).\,?)", string)))
def __repr__(self):
if self.everyDay:
return "<Regularity = everyday>"
else:
return f"<Regularity on {str(self.days)}>"
class Route:
def __init__(self, station1, station2):
self.s1 = station1
self.s2 = station2
@classmethod
def fromStationsID(cls, sid1, sid2):
return cls(
Station(sid=sid1),
Station(sid=sid2))
@classmethod
def fromString(cls, string):
names = string.split(" – ")
return cls(
Station(name=names[0]),
Station(name=names[1]))
def __repr__(self):
return f"<Route {str(self.s1)} -> {str(self.s2)}>"
import requests
import re
from bs4 import BeautifulSoup
from primitives import Time, Date, Train, Regularity, Route, Station
class ScheduleItem:
def __init__(self, **kwargs):
"""Expecting such kwargs:
train, regularity, route, dispatchArrival, dispatchDeparture,
destinationArrival, destinationDeparture, time, rangeFrom, rangeTo
"""
self.__dict__.update(kwargs)
def __repr__(self):
try:
return f"<ScheduleItem on {self.route}>"
except AttributeError:
return f"<ScheduleItem on {self.station}>"
class TripSchedule:
# Cache is a dict(), where key is tuple (station1, station2) and value
# is a list of ScheduleItem
CACHE = dict()
def __init__(self, route):
if not route.s1.withID or not route.s2.withID:
raise IDRequired("ID is required for stations of route.")
self.route = route
@classmethod
def fromStationsID(cls, sid1, sid2):
return cls(
Route.fromStationsID(sid1, sid2))
@property
def schedule(self):
key = (self.route.s1.sid, self.route.s2.sid)
if key in TripSchedule.CACHE:
return TripSchedule.CACHE[key]
else:
return self.loadSchedule()
def loadSchedule(self):
items = list()
soup = BeautifulSoup(
requests.get((
"https://swrailway.gov.ua/timetable/eltrain3-7/"
f"?sid1={self.route.s1.sid}&sid2={self.route.s2.sid}"
)).text,
"html.parser"
)
table = soup \
.contents[6] \
.td \
.contents[1] \
.contents[19] \
.findAll("tr")[4::2]
for row in table:
items.append(
ScheduleItem(
train=Train(row.contents[1].contents[0].text),
routeID=int(
re.findall(
r"tid=(\d+)",
row.contents[1].contents[0]["href"]
)[0]
),
regularity=Regularity(row.contents[3].text.rstrip("\xa0")),
route=Route.fromString(
row.contents[5].text.rstrip("\xa0")),
dispatchArrival=Time(row.contents[7].text),
dispatchDeparture=Time(row.contents[8].text),
destinationArrival=Time(row.contents[9].text),
destinationDeparture=Time(row.contents[10].text),
time=Time(row.contents[11].text),
rangeFrom=Date(row.contents[13].text.rstrip("\xa0")),
rangeTo=Date(row.contents[14].text.rstrip("\xa0"))
))
TripSchedule.CACHE[(self.route.s1.sid, self.route.s2.sid)] = items
return items
def __repr__(self):
return f"<TripSchedule on {self.route}>"
class RouteSchedule:
# Cache is a dict(), where key is number of rout and value a list of
# ScheduleItem
CACHE = dict()
def __init__(self, tid):
self.tid = tid
def __repr__(self):
return f"<RouteSchedule id={self.tid}>"
@property
def schedule(self):
if self.tid in RouteSchedule.CACHE:
return RouteSchedule.CACHE[self.tid]
else:
return self.loadSchedule()
def loadSchedule(self):
items = list()
soup = BeautifulSoup(
requests.get((
"https://swrailway.gov.ua/timetable/eltrain3-7/"
f"?tid={self.tid}"
)).text,
"html.parser"
)
table = soup \
.contents[6] \
.td \
.contents[1] \
.contents[8] \
.findAll("tr")[6::2]
for row in table:
items.append(
ScheduleItem(
station=Station(
sid=int(
re.findall(
r"sid=(\d+)",
row.contents[3].contents[1]["href"]
)[0]
),
name=row.contents[3].contents[1].text
),
arrival=Time(row.contents[5].text),
departure=Time(row.contents[6].text)
)
)
RouteSchedule.CACHE[self.tid] = items
return items
@classmethod
def fromScheduleItem(cls, item):
return cls(item.routeID)
class StationSchedule:
# Cache is dict, where key is ID and value is list of ScheduleItem
CACHE = dict()
def __init__(self, station):
if not station.withID:
raise IDRequired("ID is required for station.")
self.station = station
@classmethod
def fromStationID(cls, sid):
return cls(Station(sid))
@property
def schedule(self):
if self.station.sid in StationSchedule.CACHE:
return StationSchedule.CACHE[self.station.sid]
else:
return self.loadSchedule()
def loadSchedule(self):
items = list()
soup = BeautifulSoup(
requests.get((
"https://swrailway.gov.ua/timetable/eltrain3-7/"
f"?sid1={self.station.sid}"
)).text,
"html.parser"
)
table = soup \
.contents[6] \
.td \
.contents[1] \
.contents[15] \
.findAll("tr")[4::2]
for row in table:
items.append(
ScheduleItem(
train=Train(row.contents[1].contents[0].text),
regularity=Regularity(row.contents[3].text.rstrip("\xa0")),
route=Route.fromString(row.contents[5].text),
arrival=Time(row.contents[7].text),
departure=Time(row.contents[9].text),
rangeFrom=Date(row.contents[11].text.rstrip("\xa0")),
rangeTo=Date(row.contents[12].text.rstrip("\xa0"))
))
StationSchedule.CACHE[self.station.sid] = items
return items
def __repr__(self):
return f"<StationSchedule for {self.station}>"
class IDRequired(Exception):
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment