Skip to content

Instantly share code, notes, and snippets.

@Tafkas
Last active October 9, 2017 15:36
Show Gist options
  • Save Tafkas/3ca874700d589107b9c04619fd73f9eb to your computer and use it in GitHub Desktop.
Save Tafkas/3ca874700d589107b9c04619fd73f9eb to your computer and use it in GitHub Desktop.
ebook monkey script for posting Packt free technology eBook to Slack
import json
import re
import requests
from lxml import html
BASEURL = 'https://www.packtpub.com'
HEADER = {'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'}
OUTPUT = {'pav': {'channel': "#ebook-monkey",
'webhook': "https://hooks.slack.com/services/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"},
# 'pav-family': {'channel': "#my-channel",
# 'webhook': "https://hooks.slack.com/services/...."}
}
SLACK_USERNAME = "ebook monkey"
SLACK_ICON_EMOJI = ":monkey_face:"
def fix_string(foo):
if isinstance(foo, basestring):
foo = foo.encode('utf8')
else:
foo = unicode(foo).encode('utf8')
return foo
def fetch_isbn13(detail_page_url):
"""Fetches the ISBN13 number from the Packt Book detail page
:param detail_page_url: the url of the packt book detal page
:return: the ISBN13 of the book
"""
page = requests.get(detail_page_url, headers=HEADER)
tree = html.fromstring(page.text)
isbn13 = tree.xpath('//*[contains(concat( " ", @itemprop, " " ), concat( " ", "isbn", " " ))]')[0].text
return isbn13
def fetch_amazon_rating(isbn13):
"""Fetches the Amazon Rating, number of ratings and ASIN from Amazon
:param isbn13: An ISBN13 number
:return: asin number and the Amazon rating and number of ratings
"""
try:
s = requests.Session()
response = s.get('https://www.amazon.com')
cookies = dict(response.cookies)
url = '''https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords={}'''.format(isbn13)
response = requests.get(url, headers=HEADER, cookies=cookies)
if response.status_code == 503:
response.raise_for_status()
except requests.exceptions.HTTPError as e:
print "oops something unexpected happened: {}".format(e)
tree = html.fromstring(response.text)
tmp = tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "a-icon-alt", " " ))]')
# get rating
rating_regex = re.compile('(\d+(\.\d+)?)')
rating_matches = [m.group(0) for foo in tmp for m in [rating_regex.search(foo.text)] if m]
amazon_rating, number_of_ratings = None, None
if len(rating_matches) > 0:
amazon_rating = float(rating_matches[0])
# get number of ratings
# number_of_ratings = tree.xpath('''/html/body/div[1]/div[3]/div/div[3]/div[2]/div/div[4]/div[1]/div/ul/li/div/div[3]/div[4]/a''')[0].text
# get asin (is either isbn10 or amazon specific)
tmp = tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "a-link-normal a-text-normal", " " ))]')[0]
m = re.search('\d{10}', tmp.attrib['href'])
asin = None
if m:
asin = m.group(0)
else:
m = re.search('(B[0-9]{2}[0-9A-Z]{7}|[0-9]{9}(X|0-9]))', tmp.attrib['href'])
if m:
asin = m.group(0)
return asin, amazon_rating, number_of_ratings
def fetch_ebook():
"""Fetches the Packt free ebook of the day, enriches the output with information from Amazon and posts it to Slack
"""
free_book_url = BASEURL + '/packt/offers/free-learning'
page = requests.get(free_book_url, headers=HEADER)
tree = html.fromstring(page.text)
title = tree.xpath('//*[(@id = "deal-of-the-day")]//h2')[0].text.strip()
description = tree.xpath('''//*[(@id = "deal-of-the-day")]
//div[(((count(preceding-sibling::*) + 1) = 4) and parent::*)]''')[0].text.strip()
cover_image = tree.xpath('''//*[contains(concat( " ", @class, " " ),
concat( " ", "imagecache-dotd_main_image", " " ))]''')
image_url = 'https:/{}'.format(fix_string(cover_image[0].attrib['src'][1:])).replace(' ', '%20')
detail_page = (
tree.xpath('''//*[contains(concat( " ", @class, " " ), concat( " ", "dotd-main-book-image", " " ))]//a''')
[0].attrib['href'])
isbn13 = fetch_isbn13(BASEURL + detail_page)
asin, amazon_rating, number_of_ratings = fetch_amazon_rating(isbn13)
if amazon_rating is not None:
stars = ':star:' * int(round(amazon_rating, 0))
amazon_text = ("{title} has been rated \n {rating} out of 5 stars {stars} on "
"<https://www.amazon.de/dp/{asin}?tag=de125725875-21|Amazon.com>."
.format(title=fix_string(title),
rating=amazon_rating,
stars=stars,
number_of_ratings=number_of_ratings,
asin=asin))
else:
amazon_text = ("{title} has not been rated on "
"<https://www.amazon.de/dp/{asin}?tag=de125725875-21|Amazon.com> yet."
.format(title=fix_string(title),
asin=asin))
amazon_attachment = {
"title": "Amazon Rating",
"text": amazon_text
}
for slack_name in OUTPUT:
payload = {"channel": OUTPUT[slack_name]['channel'],
"username": SLACK_USERNAME,
"icon_emoji": SLACK_ICON_EMOJI,
"attachments": [{
"title": "Free ebook today: {title}".format(title=fix_string(title)),
"title_link": free_book_url,
"text": description,
"image_url": image_url},
amazon_attachment]
}
r = requests.post(OUTPUT[slack_name]['webhook'],
data=json.dumps(payload),
headers={"content-type": "text/javascript"})
if r.status_code != 200:
print r
if __name__ == '__main__':
fetch_ebook()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment