-
-
Save sylwke3100/1daa5d17253ddd378588 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from gevent import monkey | |
monkey.patch_all() | |
import lxml.html | |
import sqlite3 | |
import requests | |
import re | |
import grequests | |
import gevent | |
import gevent.monkey | |
import threading | |
import requests | |
import requests.exceptions | |
from time import sleep | |
from gevent.pool import Pool | |
s = requests.Session() | |
fd = open('Shemat.sql', 'r') | |
script = fd.read() | |
baseHandler = sqlite3.connect('NewDB.db') | |
baseHandler.executescript(script) | |
def fetch(url): | |
r = requests.get( | |
"http://rozklady.kzkgop.pl/" + url) | |
if r.status_code == 200: | |
return r.content | |
else: | |
fetch(url) | |
linesType = { | |
'l_zwykly': 'Bus', | |
'l_tram': 'Tram', | |
'l_bezp': 'Free'} | |
def getRoutesList(): | |
rawPageCites = fetch("index.php?co=rozklady&submenu=linie_szczegolowo") | |
rawDataCites = lxml.html.fromstring(rawPageCites) | |
for data in rawDataCites.cssselect("table#tabelka_szczegoly_linii td.td_linia span.l_tram"): | |
spanData = data[0] | |
busName = data.text_content() | |
busType = linesType[data.attrib["class"]] | |
if busType == "Tram": | |
busId = baseHandler.execute("SELECT * FROM buses WHERE name='"+busName+"'").fetchone() | |
if busId == None: | |
print busName + " "+ busType | |
baseHandler.execute("INSERT INTO buses VALUES (NULL, '"+ busName+"', '"+ busType+"')") | |
busId = [getId('buses', busName)] | |
getBusInfo(data.cssselect("a")[0].attrib['href'], busId[0], busType) | |
baseHandler.commit() | |
def getBusInfo(link, idBus, Type): | |
infoBusRawPage = fetch(link) | |
infoBusRawData = lxml.html.fromstring(infoBusRawPage) | |
citesRawData = None | |
cities = {} | |
ignoreBus = 0 | |
try: | |
citesRawData = infoBusRawData.cssselect("div#dolne_info table#trasy_legenda")[0] | |
except IndexError: | |
try: | |
links = infoBusRawData.cssselect("div#div_tabelki_tras ul li a")[0] | |
infoBusRawPage = fetch(links.attrib['href']) | |
infoBusRawData = lxml.html.fromstring(infoBusRawPage) | |
citesRawData = infoBusRawData.cssselect("div#dolne_info table#trasy_legenda")[0] | |
except IndexError: | |
ignoreBus = 1 | |
if ignoreBus != 1: | |
checkCity = 0 | |
cityId = '' | |
for listD in citesRawData.cssselect("tr td"): | |
try: | |
wyw = listD.attrib['class'] | |
wywFind = wyw.find('gmina', 0) | |
if wywFind != -1: | |
cityId = wyw[wywFind:] | |
checkCity = 1 | |
except (KeyError or IndexError): | |
if checkCity == 1: | |
cities[cityId] = listD.text_content() | |
checkCity = 0 | |
else: | |
break | |
listRoutes = [] | |
cs = re.compile('(nr_linii=[A-Za-z0-9]+)|(przyst=[0-9]+)|(id_trasy=[0-9]+)') | |
for infoData in infoBusRawData.cssselect("div#div_tabelki_tras div"): | |
try: | |
idName = infoData.attrib["id"] | |
linkDb = [] | |
if idName in ['prawo', 'lewo', 'srodek']: | |
index = ['prawo', 'lewo', 'srodek'].index(idName) | |
for stopL in infoBusRawData.cssselect("div#"+idName+" tr"): | |
if stopL.attrib['class'] != 'tr_kierunek': | |
tdBase = stopL.cssselect('td') | |
cityBase = tdBase[0].attrib['class'] | |
cityName = cities[cityBase[cityBase.find('gmina', 0):]] | |
stopName = tdBase[1].text_content() | |
link = stopL.cssselect('td > a')[0].attrib['href'] | |
linkDb.append(['http://77.252.189.162/'+link, index]) | |
cityId = getId('cities', cityName) | |
if baseHandler.execute('SELECT * FROM stops WHERE cityId='+str(cityId)+' AND name="'+stopName+'" LIMIT 1').fetchone() == None: | |
baseHandler.execute('INSERT INTO stops VALUES (NULL, '+str(cityId)+', "'+stopName+'")') | |
for lk in linkDb: | |
getRoutes(lk[0], idBus, lk[1]) | |
else: | |
pass | |
except KeyError: | |
pass | |
else: | |
ignoreBus = 0 | |
typesAll = {} | |
def getId(typedata, name): | |
queryType = None | |
try: | |
queryType = typesAll[typedata][name] | |
except KeyError: | |
current = baseHandler.execute('SELECT id FROM '+typedata+' WHERE name="'+name+'"').fetchone() | |
if current == None: | |
baseHandler.execute('INSERT INTO '+typedata+' VALUES (NULL, "'+name+'")') | |
current = baseHandler.execute('SELECT id FROM '+typedata+' WHERE name="'+name+'"').fetchone() | |
typesAll[typedata] = {} | |
typesAll[typedata][name] = current[0] | |
return typesAll[typedata][name] | |
routesId = {} | |
def getRouteId(Id): | |
try: | |
return routesId[Id] | |
except KeyError: | |
queryLink = baseHandler.execute('SELECT id FROM timetableRoutes WHERE routeId="'+str(Id)+'"').fetchone() | |
if queryLink == None: | |
return None | |
else: | |
routesId[Id] = queryLink[0] | |
return True | |
def f(x): | |
ret = None | |
try: | |
ret = s.get(x[0]) | |
except requests.exceptions.ConnectionError: | |
sleep(2) | |
ret = s.get(x[0]) | |
routeId = x[1] | |
currentType = x[2] | |
queryLink = getRouteId(routeId) | |
if queryLink == None: | |
getSingleRoute(ret.content, x[3], routeId, getId('timeTableType', currentType), x[4], x[5]) | |
return 1 | |
def getRoutes(link, bus, dire): | |
res = s.get(link) | |
ids = re.findall(ur'id_kursu=(.+?)&.+?class="(.+?)&?".<sup class="typ_(.*?)"', res.text) | |
base_url = 'http://77.252.189.162/rozklad/pokaz_kurs.php?id_kursu=%s' | |
urls = [[base_url % id_[0], id_[0], id_[1], bus, dire, id_[2]] for id_ in ids] | |
p = Pool(25) | |
p.map(f , urls) | |
def getSingleRoute(data, bus, routeId, currentType, dire, lowFlor): | |
calcStopsInRoute = 0 | |
for stop in re.findall(ur'\);">(.+?)</a>.+?(\d{1,2}:\d{2})', data): | |
calcStopsInRoute += 1 | |
sTime = (stop[1]).split(':') | |
calcTime = (int(sTime[0])*3600) + (int(sTime[1])*60) | |
nameStop = stop[0] | |
stopId = getId('stops', nameStop) | |
baseHandler.execute('INSERT INTO timetable VALUES (NULL,'+str(stopId)+','+str(routeId)+','+str(calcStopsInRoute)+','+str(calcTime)+') ') | |
baseHandler.execute('INSERT INTO timetableRoutes VALUES (NULL, '+str(routeId)+', '+str(currentType)+','+str(bus)+','+str(dire)+','+str(lowFlor)+')') | |
pass | |
getRoutesList() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TABLE "buses" ( | |
"id" INTEGER PRIMARY KEY AUTOINCREMENT, --identyfikator pojazdu | |
"name" TEXT NOT NULL, --nazwa pojazdu | |
"type" TEXT NOT NULL --typ pojazdu (dla skryptu tylko tramwaje) | |
); | |
CREATE TABLE "cities" ( | |
"id" INTEGER PRIMARY KEY AUTOINCREMENT, --identyfikator miasta | |
"name" TEXT --nazwa miasta | |
); | |
CREATE TABLE "stops" ( | |
"id" INTEGER PRIMARY KEY AUTOINCREMENT, --identyfikator przystanku | |
"cityId" INTEGER, --identyfikator miasta w którym jest przystanek | |
"name" TEXT --nazwa przystanku | |
); | |
CREATE TABLE "timetable" ( | |
"id" INTEGER PRIMARY KEY AUTOINCREMENT, --identyfikator elementu rozkładu | |
"stopId" INTEGER, --identyfikator przystnaku na rozkładzie | |
"routeId" INTEGER, --identyfikator rozkładu (według strony KZKGOPu | |
"countTable" INTEGER, --numer przystanku na rozkładzie | |
"rTime" INTEGER --czas przystanku w sekundach | |
); | |
CREATE TABLE "timetableRoutes" ( | |
"id" INTEGER PRIMARY KEY AUTOINCREMENT, --identyfikator trasy rozkładu | |
"routeId" INTEGER, --identyfikator rozkładu (według strony KZKGOPu | |
"timeTableTypeId" INTEGER, --identyfiaktor typu rozkładu (baza niżej) | |
"busId" INTEGER, --identyfikator busa dla rozkładu | |
"routeDirection" INTEGER, --kierunek rozkładu 0 - prawo, 1 - lewo, 2 - środek (odpowiadanią temu co na stronie kzkgopu środek oznacza trasy okreżne) | |
"lowFloor" INTEGER --Kurs niskopodłogowy (1 - tak, 0 - nie) | |
); | |
CREATE TABLE "timeTableType"( | |
"id" INTEGER PRIMARY KEY AUTOINCREMENT, --identyfikator typu rozkładu | |
"name" TEXT --nazwa typu rozkładu (np Robocze, Wolne itp) | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment