Skip to content

Instantly share code, notes, and snippets.

Created June 14, 2020 20:54
Show Gist options
  • Save BHushanRathod/d7942229914b04dfa7fb076efda011fb to your computer and use it in GitHub Desktop.
Save BHushanRathod/d7942229914b04dfa7fb076efda011fb to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
from selenium import webdriver
import random
import time
file1 = open("Hotel_List.txt", "w")
headers = {
'Accept': 'text/javascript, text/html, application/xml, text/xml, */*',
'Accept-Encoding': 'gzip,deflate',
'Accept-Language': 'en-US,en;q=0.5',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
'Pragma': 'no-cache',
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:28.0) Gecko/20100101 Firefox/28.0',
'X-Requested-With': 'XMLHttpRequest'
UAS = (
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
"Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0'
ua = UAS[random.randrange(len(UAS))]
headers.update({'User-Agent': ua})
driver = webdriver.Chrome('/Users/bhushan/Downloads/chromedriver')
all_url = []
def get_hotel_name(soup):
global row
for a in soup.findAll('h1', {'class': 'detail-baseinfo_name'}):
name = a.get_text().strip()
print("Hotel Name: ", name)
file1.write("Hotel Name: \t%s\n" % name)
print("~" * 50)
def get_room_details(soup):
Method to get all details about specific hotel
global row
for a in soup.findAll('div', {'class': 'roomlist-baseroom-card'}):
for b in a.findAll('div', {'class': 'roomname'}):
print("Room Name: ", b.get_text().strip())
file1.write("Room Name: \t%s\n" % b.get_text().strip())
for b in a.findAll('div', {'class': 'roomcard'}):
for c in b.findAll('div', {'class': 'salecard-flex'}):
for d in c.findAll('div', {'class': 'salecard-bedfacility'}):
for e in d.findAll('div', {'class': 'facility'}):
for f in e.findAll('span', {'class': 'desc-text underline'}):
print("Amenities: ", f.get_text().strip())
file1.write("Amenities: \t%s\n" % f.get_text().strip())
for d in c.findAll('div', {'class': 'bed'}):
for e in d.findAll('div', {'class': 'bed-content'}):
for f in e.findAll('span', {'class': 'underline'}):
print("Bed Type: ", f.get_text().strip())
file1.write("Bed Type: \t%s\n" % f.get_text().strip())
for d in c.findAll('div', {'class': 'salecard-price'}):
for e in d.findAll('div', {'class': 'salecard-price-panel'}):
for f in e.findAll('div', {'class': 'note'}):
print("Price: ", f.get_text().strip())
file1.write("Price: \t%s\n" % f.get_text().strip())
print('~' * 100)
def get_data():
Method to get the data from given chekin_date, checkout_date, city, no_of_people.
url = '' \
'&optionType=City&display=Beijing&crn=1&adult=2&children=0&searchBoxArg=t&travelPurpose=0&ctm_ref=ix_sb_dl' \
'&domestic=1 '
# append each hotel to the list
window = []
for i in driver.find_elements_by_class_name('list-card-title'):
# append the urls of each page to the list
for i in window:
html = driver.page_source
soup = BeautifulSoup(html, 'lxml')
if __name__ == '__main__':
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment