Skip to content

Instantly share code, notes, and snippets.

View ThibaudLamothe's full-sized avatar
👨‍💻

Thib ThibaudLamothe

👨‍💻
View GitHub Profile
[
{
"single": "14.56",
"avg": "15.61",
"round": "Final",
"competition": "Kingscube Toulouse Open 2014"
},
{
"single": "13.50",
"avg": "13.70",
import json
import requests
url = 'http://localhost:9080/crawl.json' + '?'
url += 'start_requests=1' + '&'
url += 'spider_name=wca'
data = requests.get(url)
data = json.loads(data.content)
class WcaSpider(scrapy.Spider):
name = 'wca'
start_urls = ['https://www.worldcubeassociation.org/persons/2012LAMO01']
allowed_domains = ['worldcubeassociation.org']
def parse(self, response):
# Identify all rows from the desired table
rows = response.css('div.results-by-event table tbody.event-333 tr.result')
import scrapy
class WcaSpider(scrapy.Spider):
name = 'wca'
allowed_domains = ['worldcubeassociation.org']
start_urls = ['http://worldcubeassociation.org/']
def parse(self, response):
pass
class airbnbSpider(scrapy.Spider):
name = "airbnbSpider"
def __init__(self):
# Urls of the cities to scrap
self.start_urls = [url_city_1, url_city_2]
# Trackers
self.page = 0
self.object = 0
def parse_hotel(self, response):
# Get hotel informations
info_1 = response.css('info_1_selector')
info_2 = response.css('info_2_selector')
# ...
yield {
"info_1":info_1,
"info_2":info_2
def parse(self, response):
# Getting the hotels list
hotels = response.css('hotels_selector')
# Following hotels pages
for hotel in hotels:
info_1 = hotel.css('info_1_selector')
info_2 = hotel.css('info_2_selector')
yield {
def parse(self, response):
# Getting the hotels list
hotel_links = response.css('hotel_selector')
# Following hotels pages
for hotel in hotel_links:
yield response.follow(url=link, callback=self.parse_hotel)
# Get Next Page of hotels
def prepare_request(city, checkin=None, checkout=None, price_min=None, price_max=None, currency='USD'):
""" Given a city and eventual dates, and eventual prices, returns the airbnb url to scrap
Both dates must be strings formatted 'YYYY-MM-DD'
"""
url = f'https://www.airbnb.com/s/{city}/homes/?'
if checkin and checkout:
url += f'&checkin={checkin}&checkout={checkout}'
if price_min and price_max:
url+= f'&price_min={price_min}&price_max={price_max}&display_currency={currency}'
return url
price_info = annonce.css(' div._ls0e43 ::text').extract()
['Price:', '$140', ' / night', 'Price:', '$974 total', 'Show details']