Skip to content

Instantly share code, notes, and snippets.

@ThibaudLamothe
Created February 1, 2021 18:12
Show Gist options
  • Save ThibaudLamothe/c8dabb22e930916057083047fed5fbc1 to your computer and use it in GitHub Desktop.
Save ThibaudLamothe/c8dabb22e930916057083047fed5fbc1 to your computer and use it in GitHub Desktop.
class WcaSpider(scrapy.Spider):
name = 'wca'
start_urls = ['https://www.worldcubeassociation.org/persons/2012LAMO01']
allowed_domains = ['worldcubeassociation.org']
def parse(self, response):
# Identify all rows from the desired table
rows = response.css('div.results-by-event table tbody.event-333 tr.result')
# Fore each row yield an item
for row in rows:
competition = row.css('td.competition a::text').extract()
competition = None if len(competition) == 0 else competition[0]
yield {
'single' : row.css('td.single::text').extract_first().strip(),
'avg' : row.css('td.average::text').extract_first().strip(),
'round' : row.css('td.round ::text').extract_first(),
'competition' : competition,
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment