Skip to content

Instantly share code, notes, and snippets.

@Isan-Rivkin
Last active August 3, 2019 20:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Isan-Rivkin/0a3f7568a7e82a6d02d1aa3c0b74922d to your computer and use it in GitHub Desktop.
Save Isan-Rivkin/0a3f7568a7e82a6d02d1aa3c0b74922d to your computer and use it in GitHub Desktop.
Hotels.com availability crawler
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import time
from bs4 import BeautifulSoup
import pypdfrom selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import time
from bs4 import BeautifulSoup
import pypd
import json
import sys
import logging
import logging.config
pypd.api_key = ""
chrome_options = Options()
URL = "https://www.hotels.com"
DRIVER = "/home/isan/Desktop/random/bookingzanzi/test1/chromedriver2"
GOLD_HOTEL = 'Gold Zanzibar Beach House & Spa, Kendwa, Tanzania'
def check_hotel(name,in_date,out_date):
driver = webdriver.Chrome(executable_path = DRIVER,chrome_options=chrome_options)
driver.get(URL)
# enter hotel name to search box
hotel_name = '//*[@id="qf-0q-destination"]'
hotel_real_name = name
search_input = driver.find_element_by_xpath(hotel_name)
search_input.send_keys(hotel_real_name)
time.sleep(1)
# check in
check_in = '//*[@id="qf-0q-localised-check-in"]'
chck_in_date = in_date
chkin_input = driver.find_element_by_xpath(check_in)
chkin_input.clear()
chkin_input.send_keys(chck_in_date)
time.sleep(0.5)
# check out
check_out = '//*[@id="qf-0q-localised-check-out"]'
chck_out_date = out_date
chkout_input = driver.find_element_by_xpath(check_out)
chkout_input.clear()
chkout_input.send_keys(chck_out_date)
time.sleep(1)
# mid click
mid = '//*[@id="hds-marquee"]/div[3]/div[1]/div/form/div[4]/div'
elem = driver.find_element_by_xpath(mid)
ActionChains(driver).click(elem).perform()
# click on button
search_button = '//*[@id="hds-marquee"]/div[3]/div[1]/div/form/div[5]/button'
elem = driver.find_element_by_xpath(search_button)
actions = ActionChains(driver)
actions.click(elem).perform()
time.sleep(3)
if name != GOLD_HOTEL:
# check if sold out
sold_out = '//*[@id="listings"]/ol/li[1]/article/a/h2'
result = 'ok'
contains_no_availability = False
try:
sold_out_txt = driver.find_element_by_xpath(sold_out)
result = str(sold_out_txt.text)
contains_no_availability = "no availability" in result
except:
result = 'AVAILABLE'
driver.quit()
return result, contains_no_availability
else:
price = '//*[@id="book-info-container"]/div[1]/div[1]/div[1]/div/span'
price_txt = driver.find_element_by_xpath(price)
price_str = str(price_txt.text)[1:]
if "," in price_str:
price_str = price_str.replace(',','')
price_num = int(price_str)
ok_limit_price = 7500
is_not_available = price_num > ok_limit_price
driver.quit()
return 'price :{} > {}'.format(price_str,ok_limit_price), is_not_available
def check_hotels(hotels, chkin_date,chkout_date):
final_result = {}
for hotel in hotels:
try:
result, is_not_available = check_hotel(hotel,chkin_date,chkout_date)
final_result[hotel] = {}
final_result[hotel]['available'] = not is_not_available
final_result[hotel]['paylod'] = result
except:
final_result[hotel] = {}
final_result[hotel]['available'] = False
final_result[hotel]['paylod'] = 'some error occured!'
final_result[hotel]['error'] = True
return final_result
def alert(paylod):
pypd.EventV2.create(data={
'routing_key': '08dc525e79e346dfaf69dc27dfa82b48',
'event_action': 'trigger',
'payload': {
'summary': str(paylod),
'severity': 'error',
'source': 'pypd bot',
}
})
def init_logger(fname):
formatter = logging.Formatter(fmt='%(asctime)s %(module)s,line: %(lineno)d %(levelname)8s | %(message)s',
datefmt='%Y/%m/%d %H:%M:%S') # %I:%M:%S %p AM|PM format
logger = logging.getLogger('hotels')
logger.setLevel(logging.DEBUG)
# create file handler which logs even debug messages
fh = logging.FileHandler(fname)
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
logger.addHandler(fh)
logger.info("Logger initiated!")
return logger
def should_alert(result):
is_alert = False
for hotel in result:
if result[hotel]['available'] == True:
is_alert = True
return is_alert
def run_process(hotels,chck_in_date,chck_out_date,delay_secs,logger):
while True:
logger.info('Starting round...')
result = check_hotels(hotels, chck_in_date, chck_out_date)
logger.info(str(result))
# check if should alert
alert_res = should_alert(result)
if alert_res != False:
logger.info('Alert!!!!!!!')
alert(result)
logger.info('End round...')
time.sleep(delay_secs)
chck_in_date = '04/10/2019'
chck_out_date = '07/10/2019'
h1 = 'Riu Palace Zanzibar, Nungwi, Zanzibar, Tanzania'
h2 = 'Zuri Zanzibar, Kendwa, Tanzania'
# this is 10k too expensive
h3 = 'Gold Zanzibar Beach House & Spa, Kendwa, Tanzania'
#h4 = 'Ocean Paradise Resort & Spa Zanzibar, Kiwengwa, Tanzania'
h5 = "Hotel La Gemma dell'Est - All Inclusive, Nungwi, Tanzania"
test = 'the z hotel, Nungwi, Tanzania'
# r = check_hotels([h1,h2,h3,test], chck_in_date, chck_out_date)
logger = init_logger('hotels.log')
delay = 100
run_process([h1,h2,h3,h5], chck_in_date,chck_out_date,delay,logger)
# run_process([h1], chck_in_date,chck_out_date,delay,logger)
# logger = init_logger('hotels.log')
# logger.info(str(r))
# print(r)
# SxvH-jYmuRHQ3qx5sHES
import json
import sys
import logging
import logging.config
pypd.api_key = ""
chrome_options = Options()
URL = "https://www.hotels.com"
DRIVER = "/home/isan/Desktop/random/bookingzanzi/test1/chromedriver2"
GOLD_HOTEL = 'Gold Zanzibar Beach House & Spa, Kendwa, Tanzania'
def check_hotel(name,in_date,out_date):
driver = webdriver.Chrome(executable_path = DRIVER,chrome_options=chrome_options)
driver.get(URL)
# enter hotel name to search box
hotel_name = '//*[@id="qf-0q-destination"]'
hotel_real_name = name
search_input = driver.find_element_by_xpath(hotel_name)
search_input.send_keys(hotel_real_name)
time.sleep(1)
# check in
check_in = '//*[@id="qf-0q-localised-check-in"]'
chck_in_date = in_date
chkin_input = driver.find_element_by_xpath(check_in)
chkin_input.clear()
chkin_input.send_keys(chck_in_date)
time.sleep(0.5)
# check out
check_out = '//*[@id="qf-0q-localised-check-out"]'
chck_out_date = out_date
chkout_input = driver.find_element_by_xpath(check_out)
chkout_input.clear()
chkout_input.send_keys(chck_out_date)
time.sleep(1)
# mid click
mid = '//*[@id="hds-marquee"]/div[3]/div[1]/div/form/div[4]/div'
elem = driver.find_element_by_xpath(mid)
ActionChains(driver).click(elem).perform()
# click on button
search_button = '//*[@id="hds-marquee"]/div[3]/div[1]/div/form/div[5]/button'
elem = driver.find_element_by_xpath(search_button)
actions = ActionChains(driver)
actions.click(elem).perform()
time.sleep(3)
if name != GOLD_HOTEL:
# check if sold out
sold_out = '//*[@id="listings"]/ol/li[1]/article/a/h2'
result = 'ok'
contains_no_availability = False
try:
sold_out_txt = driver.find_element_by_xpath(sold_out)
result = str(sold_out_txt.text)
contains_no_availability = "no availability" in result
except:
result = 'AVAILABLE'
driver.quit()
return result, contains_no_availability
else:
price = '//*[@id="book-info-container"]/div[1]/div[1]/div[1]/div/span'
price_txt = driver.find_element_by_xpath(price)
price_str = str(price_txt.text)[1:]
if "," in price_str:
price_str = price_str.replace(',','')
price_num = int(price_str)
ok_limit_price = 7500
is_not_available = price_num > ok_limit_price
driver.quit()
return 'price :{} > {}'.format(price_str,ok_limit_price), is_not_available
def check_hotels(hotels, chkin_date,chkout_date):
final_result = {}
for hotel in hotels:
try:
result, is_not_available = check_hotel(hotel,chkin_date,chkout_date)
final_result[hotel] = {}
final_result[hotel]['available'] = not is_not_available
final_result[hotel]['paylod'] = result
except:
final_result[hotel] = {}
final_result[hotel]['available'] = False
final_result[hotel]['paylod'] = 'some error occured!'
final_result[hotel]['error'] = True
return final_result
def alert(paylod):
pypd.EventV2.create(data={
'routing_key': '',
'event_action': 'trigger',
'payload': {
'summary': str(paylod),
'severity': 'error',
'source': 'pypd bot',
}
})
def init_logger(fname):
formatter = logging.Formatter(fmt='%(asctime)s %(module)s,line: %(lineno)d %(levelname)8s | %(message)s',
datefmt='%Y/%m/%d %H:%M:%S') # %I:%M:%S %p AM|PM format
logger = logging.getLogger('hotels')
logger.setLevel(logging.DEBUG)
# create file handler which logs even debug messages
fh = logging.FileHandler(fname)
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
logger.addHandler(fh)
logger.info("Logger initiated!")
return logger
def should_alert(result):
is_alert = False
for hotel in result:
if result[hotel]['available'] == True:
is_alert = True
return is_alert
def run_process(hotels,chck_in_date,chck_out_date,delay_secs,logger):
while True:
logger.info('Starting round...')
result = check_hotels(hotels, chck_in_date, chck_out_date)
logger.info(str(result))
# check if should alert
alert_res = should_alert(result)
if alert_res != False:
logger.info('Alert!!!!!!!')
alert(result)
logger.info('End round...')
time.sleep(delay_secs)
chck_in_date = '04/10/2019'
chck_out_date = '07/10/2019'
h1 = 'Riu Palace Zanzibar, Nungwi, Zanzibar, Tanzania'
h2 = 'Zuri Zanzibar, Kendwa, Tanzania'
# this is 10k too expensive
h3 = 'Gold Zanzibar Beach House & Spa, Kendwa, Tanzania'
#h4 = 'Ocean Paradise Resort & Spa Zanzibar, Kiwengwa, Tanzania'
h5 = "Hotel La Gemma dell'Est - All Inclusive, Nungwi, Tanzania"
test = 'the z hotel, Nungwi, Tanzania'
logger = init_logger('hotels.log')
delay = 100
run_process([h1,h2,h3,h5], chck_in_date,chck_out_date,delay,logger)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment