Skip to content

Instantly share code, notes, and snippets.

@itog
Last active February 1, 2020 13:39
Show Gist options
  • Save itog/bd28168ef25649d083235f7f642912dc to your computer and use it in GitHub Desktop.
Save itog/bd28168ef25649d083235f7f642912dc to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
import lxml.html
import time
import csv
def parse(url):
response = requests.get(url)
if response.status_code == 200:
html = lxml.html.fromstring(response.content)
name = html.xpath(
"/html/body/div/div[1]/div/main/div[2]/section[1]/div/div[1]/h1")[0].text
description = html.xpath(
"/html/body/div/div[1]/div/main/div[2]/section[2]/p/span[2]/text()")[0]
address = html.xpath(
"/html/body/div/div[1]/div/main/div[2]/section[5]/div/div[3]/text()")[0]
return [name, description, address, url]
num = 210
url = "https://hafh.com/properties/"
hostel_list = csv.writer(open('hostel_list.csv', 'w'))
hostel_list.writerow(["name", "description", "address", "url"])
for i in range(num):
print(f"Checking {i} of {num}")
time.sleep(10)
row=parse(url + str(i + 1))
if (row):
hostel_list.writerow(row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment