Skip to content

Instantly share code, notes, and snippets.

@sylwke3100
Last active August 29, 2015 14:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sylwke3100/1daa5d17253ddd378588 to your computer and use it in GitHub Desktop.
Save sylwke3100/1daa5d17253ddd378588 to your computer and use it in GitHub Desktop.
from gevent import monkey
monkey.patch_all()
import lxml.html
import sqlite3
import requests
import re
import grequests
import gevent
import gevent.monkey
import threading
import requests
import requests.exceptions
from time import sleep
from gevent.pool import Pool
s = requests.Session()
fd = open('Shemat.sql', 'r')
script = fd.read()
baseHandler = sqlite3.connect('NewDB.db')
baseHandler.executescript(script)
def fetch(url):
r = requests.get(
"http://rozklady.kzkgop.pl/" + url)
if r.status_code == 200:
return r.content
else:
fetch(url)
linesType = {
'l_zwykly': 'Bus',
'l_tram': 'Tram',
'l_bezp': 'Free'}
def getRoutesList():
rawPageCites = fetch("index.php?co=rozklady&submenu=linie_szczegolowo")
rawDataCites = lxml.html.fromstring(rawPageCites)
for data in rawDataCites.cssselect("table#tabelka_szczegoly_linii td.td_linia span.l_tram"):
spanData = data[0]
busName = data.text_content()
busType = linesType[data.attrib["class"]]
if busType == "Tram":
busId = baseHandler.execute("SELECT * FROM buses WHERE name='"+busName+"'").fetchone()
if busId == None:
print busName + " "+ busType
baseHandler.execute("INSERT INTO buses VALUES (NULL, '"+ busName+"', '"+ busType+"')")
busId = [getId('buses', busName)]
getBusInfo(data.cssselect("a")[0].attrib['href'], busId[0], busType)
baseHandler.commit()
def getBusInfo(link, idBus, Type):
infoBusRawPage = fetch(link)
infoBusRawData = lxml.html.fromstring(infoBusRawPage)
citesRawData = None
cities = {}
ignoreBus = 0
try:
citesRawData = infoBusRawData.cssselect("div#dolne_info table#trasy_legenda")[0]
except IndexError:
try:
links = infoBusRawData.cssselect("div#div_tabelki_tras ul li a")[0]
infoBusRawPage = fetch(links.attrib['href'])
infoBusRawData = lxml.html.fromstring(infoBusRawPage)
citesRawData = infoBusRawData.cssselect("div#dolne_info table#trasy_legenda")[0]
except IndexError:
ignoreBus = 1
if ignoreBus != 1:
checkCity = 0
cityId = ''
for listD in citesRawData.cssselect("tr td"):
try:
wyw = listD.attrib['class']
wywFind = wyw.find('gmina', 0)
if wywFind != -1:
cityId = wyw[wywFind:]
checkCity = 1
except (KeyError or IndexError):
if checkCity == 1:
cities[cityId] = listD.text_content()
checkCity = 0
else:
break
listRoutes = []
cs = re.compile('(nr_linii=[A-Za-z0-9]+)|(przyst=[0-9]+)|(id_trasy=[0-9]+)')
for infoData in infoBusRawData.cssselect("div#div_tabelki_tras div"):
try:
idName = infoData.attrib["id"]
linkDb = []
if idName in ['prawo', 'lewo', 'srodek']:
index = ['prawo', 'lewo', 'srodek'].index(idName)
for stopL in infoBusRawData.cssselect("div#"+idName+" tr"):
if stopL.attrib['class'] != 'tr_kierunek':
tdBase = stopL.cssselect('td')
cityBase = tdBase[0].attrib['class']
cityName = cities[cityBase[cityBase.find('gmina', 0):]]
stopName = tdBase[1].text_content()
link = stopL.cssselect('td > a')[0].attrib['href']
linkDb.append(['http://77.252.189.162/'+link, index])
cityId = getId('cities', cityName)
if baseHandler.execute('SELECT * FROM stops WHERE cityId='+str(cityId)+' AND name="'+stopName+'" LIMIT 1').fetchone() == None:
baseHandler.execute('INSERT INTO stops VALUES (NULL, '+str(cityId)+', "'+stopName+'")')
for lk in linkDb:
getRoutes(lk[0], idBus, lk[1])
else:
pass
except KeyError:
pass
else:
ignoreBus = 0
typesAll = {}
def getId(typedata, name):
queryType = None
try:
queryType = typesAll[typedata][name]
except KeyError:
current = baseHandler.execute('SELECT id FROM '+typedata+' WHERE name="'+name+'"').fetchone()
if current == None:
baseHandler.execute('INSERT INTO '+typedata+' VALUES (NULL, "'+name+'")')
current = baseHandler.execute('SELECT id FROM '+typedata+' WHERE name="'+name+'"').fetchone()
typesAll[typedata] = {}
typesAll[typedata][name] = current[0]
return typesAll[typedata][name]
routesId = {}
def getRouteId(Id):
try:
return routesId[Id]
except KeyError:
queryLink = baseHandler.execute('SELECT id FROM timetableRoutes WHERE routeId="'+str(Id)+'"').fetchone()
if queryLink == None:
return None
else:
routesId[Id] = queryLink[0]
return True
def f(x):
ret = None
try:
ret = s.get(x[0])
except requests.exceptions.ConnectionError:
sleep(2)
ret = s.get(x[0])
routeId = x[1]
currentType = x[2]
queryLink = getRouteId(routeId)
if queryLink == None:
getSingleRoute(ret.content, x[3], routeId, getId('timeTableType', currentType), x[4], x[5])
return 1
def getRoutes(link, bus, dire):
res = s.get(link)
ids = re.findall(ur'id_kursu=(.+?)&.+?class="(.+?)&?".<sup class="typ_(.*?)"', res.text)
base_url = 'http://77.252.189.162/rozklad/pokaz_kurs.php?id_kursu=%s'
urls = [[base_url % id_[0], id_[0], id_[1], bus, dire, id_[2]] for id_ in ids]
p = Pool(25)
p.map(f , urls)
def getSingleRoute(data, bus, routeId, currentType, dire, lowFlor):
calcStopsInRoute = 0
for stop in re.findall(ur'\);">(.+?)</a>.+?(\d{1,2}:\d{2})', data):
calcStopsInRoute += 1
sTime = (stop[1]).split(':')
calcTime = (int(sTime[0])*3600) + (int(sTime[1])*60)
nameStop = stop[0]
stopId = getId('stops', nameStop)
baseHandler.execute('INSERT INTO timetable VALUES (NULL,'+str(stopId)+','+str(routeId)+','+str(calcStopsInRoute)+','+str(calcTime)+') ')
baseHandler.execute('INSERT INTO timetableRoutes VALUES (NULL, '+str(routeId)+', '+str(currentType)+','+str(bus)+','+str(dire)+','+str(lowFlor)+')')
pass
getRoutesList()
CREATE TABLE "buses" (
"id" INTEGER PRIMARY KEY AUTOINCREMENT, --identyfikator pojazdu
"name" TEXT NOT NULL, --nazwa pojazdu
"type" TEXT NOT NULL --typ pojazdu (dla skryptu tylko tramwaje)
);
CREATE TABLE "cities" (
"id" INTEGER PRIMARY KEY AUTOINCREMENT, --identyfikator miasta
"name" TEXT --nazwa miasta
);
CREATE TABLE "stops" (
"id" INTEGER PRIMARY KEY AUTOINCREMENT, --identyfikator przystanku
"cityId" INTEGER, --identyfikator miasta w którym jest przystanek
"name" TEXT --nazwa przystanku
);
CREATE TABLE "timetable" (
"id" INTEGER PRIMARY KEY AUTOINCREMENT, --identyfikator elementu rozkładu
"stopId" INTEGER, --identyfikator przystnaku na rozkładzie
"routeId" INTEGER, --identyfikator rozkładu (według strony KZKGOPu
"countTable" INTEGER, --numer przystanku na rozkładzie
"rTime" INTEGER --czas przystanku w sekundach
);
CREATE TABLE "timetableRoutes" (
"id" INTEGER PRIMARY KEY AUTOINCREMENT, --identyfikator trasy rozkładu
"routeId" INTEGER, --identyfikator rozkładu (według strony KZKGOPu
"timeTableTypeId" INTEGER, --identyfiaktor typu rozkładu (baza niżej)
"busId" INTEGER, --identyfikator busa dla rozkładu
"routeDirection" INTEGER, --kierunek rozkładu 0 - prawo, 1 - lewo, 2 - środek (odpowiadanią temu co na stronie kzkgopu środek oznacza trasy okreżne)
"lowFloor" INTEGER --Kurs niskopodłogowy (1 - tak, 0 - nie)
);
CREATE TABLE "timeTableType"(
"id" INTEGER PRIMARY KEY AUTOINCREMENT, --identyfikator typu rozkładu
"name" TEXT --nazwa typu rozkładu (np Robocze, Wolne itp)
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment