Skip to content

Instantly share code, notes, and snippets.

@pierrelorioux
Last active August 29, 2015 14:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pierrelorioux/71f16efa8968f69c76bc to your computer and use it in GitHub Desktop.
Save pierrelorioux/71f16efa8968f69c76bc to your computer and use it in GitHub Desktop.
AirBnB scrapping - Washington (page 1 to 56)
# coding=utf-8
from bs4 import BeautifulSoup
import csv
import requests
with open('washington.csv', 'w') as csvfile:
fieldnames = ['flat','url','lat','lon','usr','price_night']
writer = csv.DictWriter(csvfile, delimiter=';',fieldnames=fieldnames)
writer.writeheader()
#writer.writerow({'first_name': 'Baked', 'last_name': 'Beans'})
for i in range (1,56):
r = requests.get("https://www.airbnb.fr/s/Washington--District-de-Columbia--États~Unis?ss_id=6ai4vew1&page=1"+str(i))
data = r.text
soup = BeautifulSoup(data)
divs = soup.find_all('div')
for div in divs:
#price = str(div)
#print price
lat = div.get('data-lat')
lng = div.get('data-lng')
usr = div.get('data-user')
url = div.get('data-url')
name = div.get('data-name')
nextDiv = div.findNext('div', attrs={'class':'panel-overlay-bottom-left panel-overlay-label panel-overlay-listing-label'})
if (nextDiv != None):
span = nextDiv('span',attrs={'class':'h3 price-amount'})
if (lat != None):
nameunicode = name.encode('ascii', 'ignore')
completeurl = "https://www.airbnb.fr" + url
price = str(span).replace('[<span class="h3 price-amount">',"").replace('</span>]','')
writer.writerow({'flat': nameunicode ,'url': completeurl ,'lat':lat,'lon':lng,'usr':usr,'price_night':price})
print name + ";" +"https://www.airbnb.fr"+url + ";" + lat +";"+ lng + ";" + usr + ";" + str(span).replace('[<span class="h3 price-amount">',"").replace('</span>]','')
r = 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment