Skip to content

Instantly share code, notes, and snippets.

@Dviejopomata
Created February 16, 2018 16:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Dviejopomata/ad89e8e5a6d8b15384a0febf70e966c8 to your computer and use it in GitHub Desktop.
Save Dviejopomata/ad89e8e5a6d8b15384a0febf70e966c8 to your computer and use it in GitHub Desktop.
class MatchSpider(scrapy.Spider):
name = 'match'
# allowed_domains = ['mobile.bet365.es']
def start_requests(self):
start_urls = ['https://mobile.bet365.es']
for i, url in enumerate(start_urls):
yield scrapy.Request(url, meta={'cookiejar': i}, headers={'host': 'mobile.bet365.es'},
callback=self.parse_page)
def parse_page(self, response):
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
cur.execute(
"""SELECT casa_equipo, fuera_equipo, liga , markettype, matchkey, sportkey
FROM bet365_match_mod
WHERE liga='España - 1ª División' AND partido_date > now() AND markettype IS NOT NULL AND matchkey IS NOT NULL ;""")
categorias = {
'principales': '1',
'asiaticos': '3',
'tarjetas': '4',
'corners': '5',
'goles': '6',
'mitades': '7',
'jugadores': '8',
'especiales': '9',
'diezminutos': '2'
}
for p in cur:
casa_equipo = p['casa_equipo']
fuera_equipo = p['fuera_equipo']
liga = p['liga']
markettype = p['markettype']
matchkey = p['matchkey']
sportkey = p['sportkey']
for categoria in categorias:
req_url = get_url(markettype, matchkey, sportkey, categorias[categoria])
print("--------------")
print("Empezando a leer %s" % req_url)
meta = {
'cookiejar': response.meta['cookiejar'],
'categoria': categoria,
# 'proxy': "http://192.168.1.8:5566",
'partido': {
'casa': casa_equipo, 'fuera': fuera_equipo
}
}
yield scrapy.Request(req_url, dont_filter=True,
headers={
'X-Requested-With': 'XMLHttpRequest',
'Host': 'mobile.bet365.es',
'Referer': 'https://mobile.bet365.es/default.aspx'
},
meta=meta,
callback=self.parse)
cur.close()
def parse(self, response):
partido = response.meta['partido']
originalcategoria = response.meta['categoria']
apuestas = response.css('.F')
if len(apuestas) == 0:
msg = 'La categoria %s no tiene matchbetting' % originalcategoria
client.captureException(msg)
logging.error(msg)
for apuesta in apuestas:
headers = apuesta.css('.podHeaderRow')
headers_1 = []
if headers:
for header in headers.css('.priceColumn'):
txt = header.css('::text').extract_first(default='')
headers_1.append(txt)
for event in F:
label = ""
title = apuesta.css('h1>em::text').extract_first()
wideleftcolumn = event.css('.wideLeftColumn')
if wideleftcolumn:
label = wideleftcolumn.css('::text').extract_first(default='')
for idx, price in enumerate(event.css('.priceColumn')):
classes = list(filter(None, price.css('::attr(class)').extract_first('').split(' ')))
isqtr = "qtr" in classes
opp = price.css('.opp')
header_price = ""
if len(headers_1) > 0:
header_price = headers_1[round(idx / 2 - 0.4) if isqtr == True else idx]
if opp:
opp = opp.css('::text').extract_first(default='')
odd = price.css('.odds::text').extract_first(default='')
elif "qtr" in classes:
opp = price.css('.handicap::text').extract_first(default='')
odd = "".join(price.css('::text').extract()).replace(opp, "").strip()
else:
opp = ""
odd = price.css('::text').extract_first(default='')
yield {
'label': label,
'opp': opp,
'odd': odd,
'title': title,
'header_price': header_price,
'categoria': originalcategoria,
'metadata': {'partido': partido}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment