An example of creating a "news recipe" for Calibre to download news stories and covert to mobi or epub.
Last active
February 20, 2021 13:57
-
-
Save chrislkeller/3040968 to your computer and use it in GitHub Desktop.
Example of Calibre news download recipe
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import string, re | |
from calibre import strftime | |
from calibre.web.feeds.recipes import BasicNewsRecipe | |
from calibre.ebooks.BeautifulSoup import BeautifulSoup | |
class AdvancedUserRecipe1334031382(BasicNewsRecipe): | |
title = u'<name>' | |
oldest_article = 400 | |
max_articles_per_feed = 150 | |
no_stylesheets = True | |
remove_javascript = True | |
remove_tags_before = dict(id='story') | |
remove_tags_after = dict(id='story') | |
remove_tags = [ | |
dict(name='div', attrs={'class':'share clearfix'}), | |
dict(name='div', attrs={'class':'publish-meta clearfix'}), | |
dict(name='div', attrs={'class':'modal-header'}), | |
dict(name='a', attrs={'class':'buttons prev'}), | |
dict(name='div', attrs={'class':'viewport'}), | |
dict(name='a', attrs={'class':'buttons next'}), | |
dict(name='div', attrs={'class':'box related-images'}), | |
dict(name='div', attrs={'class':'caption round4-bottom'}), | |
dict(name='div', attrs={'class':'magnify'}), | |
dict(name='div', attrs={'class':'box related-links'}), | |
dict(name='div', attrs={'class':'tags clearfix'}), | |
dict(name='div', attrs={'class':'view-comments'}), | |
dict(name='div', attrs={'id':'comment-container'}), | |
dict(name='div', attrs={'class':'related-block stories clearfix'}), | |
dict(name='div', attrs={'class':'related-block clearfix'}), | |
dict(name='div', attrs={'class':'OUTBRAIN'}) | |
] | |
feeds = [(u'<name>', u'<feed>')] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment