Skip to content

Instantly share code, notes, and snippets.

@TrishGillett
Last active February 4, 2016 01:00
Show Gist options
  • Save TrishGillett/c4a1f3c9deba57875a0d to your computer and use it in GitHub Desktop.
Save TrishGillett/c4a1f3c9deba57875a0d to your computer and use it in GitHub Desktop.
Checking the Packt Publishing free ebook of the day with Selenium
# -*- coding: utf-8 -*-
"""
author: Trish Gillett (discardthree@gmail.com, @discardthree on github)
Basic scraper to check the Packt Publishing Free ebook of the day.
This version uses selenium because when I tried getting the source via the
requests package it sometimes seemed to return the source for a version of
the website that was different from the one that was live.
Adapted from this code which was used by Estela Alvarez (supita@gmail.com)
to demo webscraping at a Montreal Pyladies meeting:
https://github.com/supita/Pyladies-Python-Web-Scraping
"""
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
def get_webdriver():
profile = webdriver.FirefoxProfile()
profile.set_preference("general.useragent.override", "some UA string")
browser = webdriver.Firefox(profile)
browser.maximize_window()
return browser
def check_dotd(browser):
browser.get('https://www.packtpub.com/packt/offers/free-learning/')
try:
WebDriverWait(browser, 3).until(EC.presence_of_element_located((By.CLASS_NAME, "dotd-title")))
source = browser.page_source
announce_book_of_the_day(source)
except:
browser.close()
def announce_book_of_the_day(page_source):
try:
soup = BeautifulSoup(page_source, "html.parser")
book_header = soup.find("div", { "class" : "dotd-title" }).h2
book_title = book_header.contents[0].strip()
print "Today's free book at is: " + book_title
print "Go to https://www.packtpub.com/packt/offers/free-learning/ to see for yourself!"
except:
print "Couldn't find out the book of the day, sorry!"
def main():
browser = get_webdriver()
check_dotd(browser)
browser.close()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment