Last active
February 4, 2016 01:00
-
-
Save TrishGillett/c4a1f3c9deba57875a0d to your computer and use it in GitHub Desktop.
Checking the Packt Publishing free ebook of the day with Selenium
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
author: Trish Gillett (discardthree@gmail.com, @discardthree on github) | |
Basic scraper to check the Packt Publishing Free ebook of the day. | |
This version uses selenium because when I tried getting the source via the | |
requests package it sometimes seemed to return the source for a version of | |
the website that was different from the one that was live. | |
Adapted from this code which was used by Estela Alvarez (supita@gmail.com) | |
to demo webscraping at a Montreal Pyladies meeting: | |
https://github.com/supita/Pyladies-Python-Web-Scraping | |
""" | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from bs4 import BeautifulSoup | |
def get_webdriver(): | |
profile = webdriver.FirefoxProfile() | |
profile.set_preference("general.useragent.override", "some UA string") | |
browser = webdriver.Firefox(profile) | |
browser.maximize_window() | |
return browser | |
def check_dotd(browser): | |
browser.get('https://www.packtpub.com/packt/offers/free-learning/') | |
try: | |
WebDriverWait(browser, 3).until(EC.presence_of_element_located((By.CLASS_NAME, "dotd-title"))) | |
source = browser.page_source | |
announce_book_of_the_day(source) | |
except: | |
browser.close() | |
def announce_book_of_the_day(page_source): | |
try: | |
soup = BeautifulSoup(page_source, "html.parser") | |
book_header = soup.find("div", { "class" : "dotd-title" }).h2 | |
book_title = book_header.contents[0].strip() | |
print "Today's free book at is: " + book_title | |
print "Go to https://www.packtpub.com/packt/offers/free-learning/ to see for yourself!" | |
except: | |
print "Couldn't find out the book of the day, sorry!" | |
def main(): | |
browser = get_webdriver() | |
check_dotd(browser) | |
browser.close() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment