Skip to content

Instantly share code, notes, and snippets.

@bootell
Created April 21, 2016 06:25
Show Gist options
  • Save bootell/df3b797905947c4679a8f071d93c2c4a to your computer and use it in GitHub Desktop.
Save bootell/df3b797905947c4679a8f071d93c2c4a to your computer and use it in GitHub Desktop.
tuicool magazines for kindleEar
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import re
from bs4 import BeautifulSoup
from base import BaseFeedBook, URLOpener, feedparser, string_of_tag
def getBook():
return tuicoolMags
class tuicoolMags(BaseFeedBook):
title = u'编程狂人'
description = u'《编程狂人》周刊是推酷网专为IT人打造的行业技术周刊。'
language = 'zh-cn'
feed_encoding = "utf-8"
page_encoding = "utf-8"
mastheadfile = "mh_tuicoolMags.gif"
coverfile = 'cv_tuicoolMags.jpg'
deliver_days = ['Friday']
def ParseFeedUrls(self):
# 从rss获取最新一期地址
rssurl = 'http://www.tuicool.com/mags/rss_programming.rss'
feedresult = feedparser.parse(rssurl)
if !feedresult:
self.log.warn('fetch web failed:%s'%rssrul)
return []
mainurl = feedresult.entries[0]['link']
# 获取最新一期内容
urladded = set()
opener = URLOpener(self.host, timeout=30)
result = opener.open(mainurl)
if result.status_code != 200:
self.log.warn('fetch web failed:%s'%mainurl)
return []
content = result.content.decode(self.feed_encoding)
urls = []
p = re.compile(r"<strong>[\s\S]*?</ol>")
for section in p.findall(content):
soup = BeautifulSoup(section, "lxml")
strong = section.find('strong')
if strong is None:
self.log.warn('strong is empty')
continue
sectitle = string_of_tag(strong).strip()
if not sectitle:
self.log.warn('strong string is empty')
continue
#self.log.info('Found section: %s' % section_title)
articles = []
subsection = ''
for node in section.find_all(class_='article-title'):
a = node.find('a', attrs={"href":True})
if a:
url = a['href']
title = string_of_tag(a)
if title:
if url not in urladded:
urls.append((sectitle,title,url,None))
urladded.add(url)
if len(urls) == 0:
self.log.warn('len of urls is zero.')
return urls
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment