Skip to content

Instantly share code, notes, and snippets.

View SivaArwin's full-sized avatar
🎯
Focusing

Siva Arwin SivaArwin

🎯
Focusing
View GitHub Profile
# Load more results to maximize the scraping
def load_more():
try:
more_results = '//a[@class = "moreButton"]'
driver.find_element_by_xpath(more_results).click()
# Printing these notes during the program helps me quickly check what it is doing
print('sleeping.....')
sleep(randint(45,60))
except:
pass
def page_scrape():
"""This function takes care of the scraping part"""
xp_sections = '//*[@class="section duration"]'
sections = driver.find_elements_by_xpath(xp_sections)
sections_list = [value.text for value in sections]
section_a_list = sections_list[::2] # This is to separate the two flights
section_b_list = sections_list[1::2] # This is to separate the two flights
# if you run into a reCaptcha, you might want to do something about it
def page_scrape():
"""This function takes care of the scraping part"""
xp_sections = '//*[@class="section duration"]'
sections = driver.find_elements_by_xpath(xp_sections)
sections_list = [value.text for value in sections]
section_a_list = sections_list[::2] # This is to separate the two flights
section_b_list = sections_list[1::2] # This is to separate the two flights
# if you run into a reCaptcha, you might want to do something about it
def start_kayak(city_from, city_to, date_start, date_end):
"""City codes - it's the IATA codes!
Date format - YYYY-MM-DD"""
kayak = ('https://www.kayak.com/flights/' + city_from + '-' + city_to +
'/' + date_start + '-flexible/' + date_end + '-flexible?sort=bestflight_a')
driver.get(kayak)
sleep(randint(8,10))
# sometimes a popup shows up, so we can use a try statement to check it and close
city_from = input('From which city? ')
city_to = input('Where to? ')
date_start = input('Search around which departure date? Please use YYYY-MM-DD format only ')
date_end = input('Return when? Please use YYYY-MM-DD format only ')
# city_from = 'LIS'
# city_to = 'SIN'
# date_start = '2019-08-21'
# date_end = '2019-09-07'