Skip to content

Instantly share code, notes, and snippets.

@tanweerali
Last active March 3, 2025 06:49
Show Gist options
  • Select an option

  • Save tanweerali/f0f31c20ff9dbd3cceb4185f93158de8 to your computer and use it in GitHub Desktop.

Select an option

Save tanweerali/f0f31c20ff9dbd3cceb4185f93158de8 to your computer and use it in GitHub Desktop.
How to Scrape Tripadvisor data using ScrapingAnts Web Scraping API in Python
from bs4 import BeautifulSoup
import http.client
base_url = "https://www.tripadvisor.com/Hotels-g187768-Italy-Hotels.html"
template_url = "https://www.tripadvisor.com/Hotels-g187768-oa{}-Italy-Hotels.html"
num_pages = 2
urls = [base_url]
for i in range(30, (num_pages * 30), 30):
urls.append(template_url.format(i))
Hotel_Name = []
Hotel_Review = []
Hotel_Link = []
for url in urls:
conn = http.client.HTTPSConnection("api.scrapingant.com")
conn.request("GET", "/v2/general?url={}&x-api-key=68f2cb0e34564a9bb9fd2e93e84ff12d".format(url))
res = conn.getresponse()
data = res.read()
soup = BeautifulSoup(data, "html.parser")
hotel_name = soup.find_all("div", attrs={"data-automation": "hotel-card-title"})
for name in hotel_name:
Hotel_Name.append(name.text)
for review in soup.find_all("div", attrs={"aria-label": True}):
text = review["aria-label"]
if "reviews" in text:
Hotel_Review.append(text.split()[-2].replace(",", ""))
hotel_link = soup.find_all("div", attrs={"data-automation": "hotel-card-title"})
for link in hotel_link:
Hotel_Link.append('https://www.tripadvisor.com'+link.find('a')['href'])
print(Hotel_Name)
print(Hotel_Review)
print(Hotel_Link)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment