Skip to content

Instantly share code, notes, and snippets.

@noahpryor
Created December 11, 2018 02:29
Show Gist options
  • Save noahpryor/3d7543f3f77380957751592d1bb9936e to your computer and use it in GitHub Desktop.
Save noahpryor/3d7543f3f77380957751592d1bb9936e to your computer and use it in GitHub Desktop.
houston chronicle recipe
import string, re
from collections import OrderedDict
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
sections = [('World', '/us-world/world/'), ('US', '/us-world/us/'), ('Texas', '/local/texas/'), ('Houston', '/local/houston/'), ('Opinion', '/opinion/editorials/')]
base_url = "https://www.houstonchronicle.com"
class HoustonChroniclePaid(BasicNewsRecipe):
title = 'Houston Chronicle'
__author__ = 'Noah Pryor'
description = 'Daily news from HoustonChronicle.com'
timefmt = '%Y-%m-%d'
needs_subscription = False
keep_only_tags = [dict(name='h1', attrs={'itemprop': ['headline']}), dict(name='section', attrs={'class': ['body']})]
no_stylesheets = True
# extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
# def get_browser(self):
# br = BasicNewsRecipe.get_browser()
# if self.username is not None and self.password is not None:
# br.open('https://www.nytimes.com/auth/login')
# br.select_form(name='login')
# br['USERID'] = self.username
# br['PASSWORD'] = self.password
# br.submit()
# return br
def parse_index(self):
feeds = []
timestampfmt = '%Y%m%d%H%M%S'
for item in sections:
section = item[0]
path = item[1]
self.log('starting parse_index: ' + section)
articles = []
soup = self.index_to_soup('https://www.houstonchronicle.com' + path)
headlines = soup.find("div", {'id':'content'}).findAll("h2", {'class': "headline"})
for h2 in headlines:
a = h2.find("a")
title = self.tag_to_string(a)
url = base_url + a['href']
articles.append({'title': title,'date': '-', 'url': url, 'description': '' })
feeds.append((section, articles))
self.log(feeds)
return feeds
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment