Skip to content

Instantly share code, notes, and snippets.

@Tafkas Tafkas/packt2slack.py
Last active Oct 9, 2017

Embed
What would you like to do?
ebook monkey script for posting Packt free technology eBook to Slack
import json
import re
import requests
from lxml import html
BASEURL = 'https://www.packtpub.com'
HEADER = {'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'}
OUTPUT = {'pav': {'channel': "#ebook-monkey",
'webhook': "https://hooks.slack.com/services/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"},
# 'pav-family': {'channel': "#my-channel",
# 'webhook': "https://hooks.slack.com/services/...."}
}
SLACK_USERNAME = "ebook monkey"
SLACK_ICON_EMOJI = ":monkey_face:"
def fix_string(foo):
if isinstance(foo, basestring):
foo = foo.encode('utf8')
else:
foo = unicode(foo).encode('utf8')
return foo
def fetch_isbn13(detail_page_url):
"""Fetches the ISBN13 number from the Packt Book detail page
:param detail_page_url: the url of the packt book detal page
:return: the ISBN13 of the book
"""
page = requests.get(detail_page_url, headers=HEADER)
tree = html.fromstring(page.text)
isbn13 = tree.xpath('//*[contains(concat( " ", @itemprop, " " ), concat( " ", "isbn", " " ))]')[0].text
return isbn13
def fetch_amazon_rating(isbn13):
"""Fetches the Amazon Rating, number of ratings and ASIN from Amazon
:param isbn13: An ISBN13 number
:return: asin number and the Amazon rating and number of ratings
"""
try:
s = requests.Session()
response = s.get('https://www.amazon.com')
cookies = dict(response.cookies)
url = '''https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords={}'''.format(isbn13)
response = requests.get(url, headers=HEADER, cookies=cookies)
if response.status_code == 503:
response.raise_for_status()
except requests.exceptions.HTTPError as e:
print "oops something unexpected happened: {}".format(e)
tree = html.fromstring(response.text)
tmp = tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "a-icon-alt", " " ))]')
# get rating
rating_regex = re.compile('(\d+(\.\d+)?)')
rating_matches = [m.group(0) for foo in tmp for m in [rating_regex.search(foo.text)] if m]
amazon_rating, number_of_ratings = None, None
if len(rating_matches) > 0:
amazon_rating = float(rating_matches[0])
# get number of ratings
# number_of_ratings = tree.xpath('''/html/body/div[1]/div[3]/div/div[3]/div[2]/div/div[4]/div[1]/div/ul/li/div/div[3]/div[4]/a''')[0].text
# get asin (is either isbn10 or amazon specific)
tmp = tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "a-link-normal a-text-normal", " " ))]')[0]
m = re.search('\d{10}', tmp.attrib['href'])
asin = None
if m:
asin = m.group(0)
else:
m = re.search('(B[0-9]{2}[0-9A-Z]{7}|[0-9]{9}(X|0-9]))', tmp.attrib['href'])
if m:
asin = m.group(0)
return asin, amazon_rating, number_of_ratings
def fetch_ebook():
"""Fetches the Packt free ebook of the day, enriches the output with information from Amazon and posts it to Slack
"""
free_book_url = BASEURL + '/packt/offers/free-learning'
page = requests.get(free_book_url, headers=HEADER)
tree = html.fromstring(page.text)
title = tree.xpath('//*[(@id = "deal-of-the-day")]//h2')[0].text.strip()
description = tree.xpath('''//*[(@id = "deal-of-the-day")]
//div[(((count(preceding-sibling::*) + 1) = 4) and parent::*)]''')[0].text.strip()
cover_image = tree.xpath('''//*[contains(concat( " ", @class, " " ),
concat( " ", "imagecache-dotd_main_image", " " ))]''')
image_url = 'https:/{}'.format(fix_string(cover_image[0].attrib['src'][1:])).replace(' ', '%20')
detail_page = (
tree.xpath('''//*[contains(concat( " ", @class, " " ), concat( " ", "dotd-main-book-image", " " ))]//a''')
[0].attrib['href'])
isbn13 = fetch_isbn13(BASEURL + detail_page)
asin, amazon_rating, number_of_ratings = fetch_amazon_rating(isbn13)
if amazon_rating is not None:
stars = ':star:' * int(round(amazon_rating, 0))
amazon_text = ("{title} has been rated \n {rating} out of 5 stars {stars} on "
"<https://www.amazon.de/dp/{asin}?tag=de125725875-21|Amazon.com>."
.format(title=fix_string(title),
rating=amazon_rating,
stars=stars,
number_of_ratings=number_of_ratings,
asin=asin))
else:
amazon_text = ("{title} has not been rated on "
"<https://www.amazon.de/dp/{asin}?tag=de125725875-21|Amazon.com> yet."
.format(title=fix_string(title),
asin=asin))
amazon_attachment = {
"title": "Amazon Rating",
"text": amazon_text
}
for slack_name in OUTPUT:
payload = {"channel": OUTPUT[slack_name]['channel'],
"username": SLACK_USERNAME,
"icon_emoji": SLACK_ICON_EMOJI,
"attachments": [{
"title": "Free ebook today: {title}".format(title=fix_string(title)),
"title_link": free_book_url,
"text": description,
"image_url": image_url},
amazon_attachment]
}
r = requests.post(OUTPUT[slack_name]['webhook'],
data=json.dumps(payload),
headers={"content-type": "text/javascript"})
if r.status_code != 200:
print r
if __name__ == '__main__':
fetch_ebook()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.