Last active
August 3, 2019 20:22
-
-
Save Isan-Rivkin/0a3f7568a7e82a6d02d1aa3c0b74922d to your computer and use it in GitHub Desktop.
Hotels.com availability crawler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver.common.action_chains import ActionChains | |
import time | |
from bs4 import BeautifulSoup | |
import pypdfrom selenium import webdriver | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver.common.action_chains import ActionChains | |
import time | |
from bs4 import BeautifulSoup | |
import pypd | |
import json | |
import sys | |
import logging | |
import logging.config | |
pypd.api_key = "" | |
chrome_options = Options() | |
URL = "https://www.hotels.com" | |
DRIVER = "/home/isan/Desktop/random/bookingzanzi/test1/chromedriver2" | |
GOLD_HOTEL = 'Gold Zanzibar Beach House & Spa, Kendwa, Tanzania' | |
def check_hotel(name,in_date,out_date): | |
driver = webdriver.Chrome(executable_path = DRIVER,chrome_options=chrome_options) | |
driver.get(URL) | |
# enter hotel name to search box | |
hotel_name = '//*[@id="qf-0q-destination"]' | |
hotel_real_name = name | |
search_input = driver.find_element_by_xpath(hotel_name) | |
search_input.send_keys(hotel_real_name) | |
time.sleep(1) | |
# check in | |
check_in = '//*[@id="qf-0q-localised-check-in"]' | |
chck_in_date = in_date | |
chkin_input = driver.find_element_by_xpath(check_in) | |
chkin_input.clear() | |
chkin_input.send_keys(chck_in_date) | |
time.sleep(0.5) | |
# check out | |
check_out = '//*[@id="qf-0q-localised-check-out"]' | |
chck_out_date = out_date | |
chkout_input = driver.find_element_by_xpath(check_out) | |
chkout_input.clear() | |
chkout_input.send_keys(chck_out_date) | |
time.sleep(1) | |
# mid click | |
mid = '//*[@id="hds-marquee"]/div[3]/div[1]/div/form/div[4]/div' | |
elem = driver.find_element_by_xpath(mid) | |
ActionChains(driver).click(elem).perform() | |
# click on button | |
search_button = '//*[@id="hds-marquee"]/div[3]/div[1]/div/form/div[5]/button' | |
elem = driver.find_element_by_xpath(search_button) | |
actions = ActionChains(driver) | |
actions.click(elem).perform() | |
time.sleep(3) | |
if name != GOLD_HOTEL: | |
# check if sold out | |
sold_out = '//*[@id="listings"]/ol/li[1]/article/a/h2' | |
result = 'ok' | |
contains_no_availability = False | |
try: | |
sold_out_txt = driver.find_element_by_xpath(sold_out) | |
result = str(sold_out_txt.text) | |
contains_no_availability = "no availability" in result | |
except: | |
result = 'AVAILABLE' | |
driver.quit() | |
return result, contains_no_availability | |
else: | |
price = '//*[@id="book-info-container"]/div[1]/div[1]/div[1]/div/span' | |
price_txt = driver.find_element_by_xpath(price) | |
price_str = str(price_txt.text)[1:] | |
if "," in price_str: | |
price_str = price_str.replace(',','') | |
price_num = int(price_str) | |
ok_limit_price = 7500 | |
is_not_available = price_num > ok_limit_price | |
driver.quit() | |
return 'price :{} > {}'.format(price_str,ok_limit_price), is_not_available | |
def check_hotels(hotels, chkin_date,chkout_date): | |
final_result = {} | |
for hotel in hotels: | |
try: | |
result, is_not_available = check_hotel(hotel,chkin_date,chkout_date) | |
final_result[hotel] = {} | |
final_result[hotel]['available'] = not is_not_available | |
final_result[hotel]['paylod'] = result | |
except: | |
final_result[hotel] = {} | |
final_result[hotel]['available'] = False | |
final_result[hotel]['paylod'] = 'some error occured!' | |
final_result[hotel]['error'] = True | |
return final_result | |
def alert(paylod): | |
pypd.EventV2.create(data={ | |
'routing_key': '08dc525e79e346dfaf69dc27dfa82b48', | |
'event_action': 'trigger', | |
'payload': { | |
'summary': str(paylod), | |
'severity': 'error', | |
'source': 'pypd bot', | |
} | |
}) | |
def init_logger(fname): | |
formatter = logging.Formatter(fmt='%(asctime)s %(module)s,line: %(lineno)d %(levelname)8s | %(message)s', | |
datefmt='%Y/%m/%d %H:%M:%S') # %I:%M:%S %p AM|PM format | |
logger = logging.getLogger('hotels') | |
logger.setLevel(logging.DEBUG) | |
# create file handler which logs even debug messages | |
fh = logging.FileHandler(fname) | |
fh.setLevel(logging.DEBUG) | |
fh.setFormatter(formatter) | |
logger.addHandler(fh) | |
logger.info("Logger initiated!") | |
return logger | |
def should_alert(result): | |
is_alert = False | |
for hotel in result: | |
if result[hotel]['available'] == True: | |
is_alert = True | |
return is_alert | |
def run_process(hotels,chck_in_date,chck_out_date,delay_secs,logger): | |
while True: | |
logger.info('Starting round...') | |
result = check_hotels(hotels, chck_in_date, chck_out_date) | |
logger.info(str(result)) | |
# check if should alert | |
alert_res = should_alert(result) | |
if alert_res != False: | |
logger.info('Alert!!!!!!!') | |
alert(result) | |
logger.info('End round...') | |
time.sleep(delay_secs) | |
chck_in_date = '04/10/2019' | |
chck_out_date = '07/10/2019' | |
h1 = 'Riu Palace Zanzibar, Nungwi, Zanzibar, Tanzania' | |
h2 = 'Zuri Zanzibar, Kendwa, Tanzania' | |
# this is 10k too expensive | |
h3 = 'Gold Zanzibar Beach House & Spa, Kendwa, Tanzania' | |
#h4 = 'Ocean Paradise Resort & Spa Zanzibar, Kiwengwa, Tanzania' | |
h5 = "Hotel La Gemma dell'Est - All Inclusive, Nungwi, Tanzania" | |
test = 'the z hotel, Nungwi, Tanzania' | |
# r = check_hotels([h1,h2,h3,test], chck_in_date, chck_out_date) | |
logger = init_logger('hotels.log') | |
delay = 100 | |
run_process([h1,h2,h3,h5], chck_in_date,chck_out_date,delay,logger) | |
# run_process([h1], chck_in_date,chck_out_date,delay,logger) | |
# logger = init_logger('hotels.log') | |
# logger.info(str(r)) | |
# print(r) | |
# SxvH-jYmuRHQ3qx5sHES | |
import json | |
import sys | |
import logging | |
import logging.config | |
pypd.api_key = "" | |
chrome_options = Options() | |
URL = "https://www.hotels.com" | |
DRIVER = "/home/isan/Desktop/random/bookingzanzi/test1/chromedriver2" | |
GOLD_HOTEL = 'Gold Zanzibar Beach House & Spa, Kendwa, Tanzania' | |
def check_hotel(name,in_date,out_date): | |
driver = webdriver.Chrome(executable_path = DRIVER,chrome_options=chrome_options) | |
driver.get(URL) | |
# enter hotel name to search box | |
hotel_name = '//*[@id="qf-0q-destination"]' | |
hotel_real_name = name | |
search_input = driver.find_element_by_xpath(hotel_name) | |
search_input.send_keys(hotel_real_name) | |
time.sleep(1) | |
# check in | |
check_in = '//*[@id="qf-0q-localised-check-in"]' | |
chck_in_date = in_date | |
chkin_input = driver.find_element_by_xpath(check_in) | |
chkin_input.clear() | |
chkin_input.send_keys(chck_in_date) | |
time.sleep(0.5) | |
# check out | |
check_out = '//*[@id="qf-0q-localised-check-out"]' | |
chck_out_date = out_date | |
chkout_input = driver.find_element_by_xpath(check_out) | |
chkout_input.clear() | |
chkout_input.send_keys(chck_out_date) | |
time.sleep(1) | |
# mid click | |
mid = '//*[@id="hds-marquee"]/div[3]/div[1]/div/form/div[4]/div' | |
elem = driver.find_element_by_xpath(mid) | |
ActionChains(driver).click(elem).perform() | |
# click on button | |
search_button = '//*[@id="hds-marquee"]/div[3]/div[1]/div/form/div[5]/button' | |
elem = driver.find_element_by_xpath(search_button) | |
actions = ActionChains(driver) | |
actions.click(elem).perform() | |
time.sleep(3) | |
if name != GOLD_HOTEL: | |
# check if sold out | |
sold_out = '//*[@id="listings"]/ol/li[1]/article/a/h2' | |
result = 'ok' | |
contains_no_availability = False | |
try: | |
sold_out_txt = driver.find_element_by_xpath(sold_out) | |
result = str(sold_out_txt.text) | |
contains_no_availability = "no availability" in result | |
except: | |
result = 'AVAILABLE' | |
driver.quit() | |
return result, contains_no_availability | |
else: | |
price = '//*[@id="book-info-container"]/div[1]/div[1]/div[1]/div/span' | |
price_txt = driver.find_element_by_xpath(price) | |
price_str = str(price_txt.text)[1:] | |
if "," in price_str: | |
price_str = price_str.replace(',','') | |
price_num = int(price_str) | |
ok_limit_price = 7500 | |
is_not_available = price_num > ok_limit_price | |
driver.quit() | |
return 'price :{} > {}'.format(price_str,ok_limit_price), is_not_available | |
def check_hotels(hotels, chkin_date,chkout_date): | |
final_result = {} | |
for hotel in hotels: | |
try: | |
result, is_not_available = check_hotel(hotel,chkin_date,chkout_date) | |
final_result[hotel] = {} | |
final_result[hotel]['available'] = not is_not_available | |
final_result[hotel]['paylod'] = result | |
except: | |
final_result[hotel] = {} | |
final_result[hotel]['available'] = False | |
final_result[hotel]['paylod'] = 'some error occured!' | |
final_result[hotel]['error'] = True | |
return final_result | |
def alert(paylod): | |
pypd.EventV2.create(data={ | |
'routing_key': '', | |
'event_action': 'trigger', | |
'payload': { | |
'summary': str(paylod), | |
'severity': 'error', | |
'source': 'pypd bot', | |
} | |
}) | |
def init_logger(fname): | |
formatter = logging.Formatter(fmt='%(asctime)s %(module)s,line: %(lineno)d %(levelname)8s | %(message)s', | |
datefmt='%Y/%m/%d %H:%M:%S') # %I:%M:%S %p AM|PM format | |
logger = logging.getLogger('hotels') | |
logger.setLevel(logging.DEBUG) | |
# create file handler which logs even debug messages | |
fh = logging.FileHandler(fname) | |
fh.setLevel(logging.DEBUG) | |
fh.setFormatter(formatter) | |
logger.addHandler(fh) | |
logger.info("Logger initiated!") | |
return logger | |
def should_alert(result): | |
is_alert = False | |
for hotel in result: | |
if result[hotel]['available'] == True: | |
is_alert = True | |
return is_alert | |
def run_process(hotels,chck_in_date,chck_out_date,delay_secs,logger): | |
while True: | |
logger.info('Starting round...') | |
result = check_hotels(hotels, chck_in_date, chck_out_date) | |
logger.info(str(result)) | |
# check if should alert | |
alert_res = should_alert(result) | |
if alert_res != False: | |
logger.info('Alert!!!!!!!') | |
alert(result) | |
logger.info('End round...') | |
time.sleep(delay_secs) | |
chck_in_date = '04/10/2019' | |
chck_out_date = '07/10/2019' | |
h1 = 'Riu Palace Zanzibar, Nungwi, Zanzibar, Tanzania' | |
h2 = 'Zuri Zanzibar, Kendwa, Tanzania' | |
# this is 10k too expensive | |
h3 = 'Gold Zanzibar Beach House & Spa, Kendwa, Tanzania' | |
#h4 = 'Ocean Paradise Resort & Spa Zanzibar, Kiwengwa, Tanzania' | |
h5 = "Hotel La Gemma dell'Est - All Inclusive, Nungwi, Tanzania" | |
test = 'the z hotel, Nungwi, Tanzania' | |
logger = init_logger('hotels.log') | |
delay = 100 | |
run_process([h1,h2,h3,h5], chck_in_date,chck_out_date,delay,logger) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment