Skip to content

Instantly share code, notes, and snippets.

@evmn
Last active February 16, 2022 09:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save evmn/e6608696a8e3a8724987f6fab534f2d6 to your computer and use it in GitHub Desktop.
Save evmn/e6608696a8e3a8724987f6fab534f2d6 to your computer and use it in GitHub Desktop.
Calibre Recipe For <WEBVISION: The Organization of the Retina and Visual System>. First rename Webvision.py to Webvision.recipe
#!/usr/bin/env python
# encoding: utf-8
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from datetime import datetime
base_url = 'https://webvision.med.utah.edu/'
class Web_Vision(BasicNewsRecipe):
title = 'WEBVISION: The Organization of the Retina and Visual System'
cover_url = 'http://webvision.med.utah.edu/wp-content/uploads/2011/05/Titlez1.jpg'
# remove_tags_before = dict(attrs={'class':'entry-content'})
remove_tags_before = dict(attrs={'class':'entry-header'})
remove_tags_after= dict(attrs={'class':'entry-content'})
__author__ = ''
language = 'en'
encoding = 'utf-8'
timefmt = ''
extra_css = 'h1{text-align:center}'
preprocess_regexps = [
(re.compile(u'<p>&nbsp;</p>'), u""),
(re.compile(u'<br>'), u""),
#(re.compile(u'<p style="text-align: center;"><a href="http://uuhsc.utah.edu/MoranEyeCenter/research/faculty/emeritus/helga_kolb.htm" target="_blank" rel="noopener">Helga Kolb</a></p>'), u""),
#(re.compile(u'<p>(About)? the authors?</p>.*</div>', re.DOTALL | re.IGNORECASE), u"</div>"),
#(re.compile(u'<p[^>]*?>Last Updated?: .*</div>', re.DOTALL), u"</div>"),
(re.compile(u' by .*?</h1>', re.IGNORECASE), u"</h1>"),
]
no_stylesheets = True
resolve_internal_links = True
remove_javascript = True
auto_cleanup = False
delay = 1
simultaneous_downloads = 5
oldest_article = 999
max_articles_per_feed = 999
def parse_index(self):
feeds = []
soup = self.index_to_soup(base_url)
archives = soup.find('ul', {'class':'chapter-children'}).findAll('li', {'class': 'page_item'})
patterns = re.compile("Part ")
pdf = re.compile("How the Retina Works")
ignore_pt = re.compile("(Part [IVX:]* )|( by .*)", re.IGNORECASE)
author_pt = re.compile("( by .*)", re.IGNORECASE)
section = []
old_title = ""
sec_title = ""
old_url = ""
isFirstGroup = True
for entry in archives:
title = entry.find('a').getText()
print(title)
url = entry.find('a')['href']
if patterns.match(title):
if len(section) < 1 and len(sec_title) > 5:
old_title = ignore_pt.sub("", old_title)
section.append({'title':old_title, 'url': old_url})
if len(section) > 0 and len(sec_title) > 5:
feeds.append((sec_title, section))
sec_title = author_pt.sub("", title)
section = []
elif not pdf.match(title):
title = ignore_pt.sub("", title)
section.append({'title':title, 'url': url})
old_title = title
old_url = url
feeds.append((sec_title, section))
for sec in feeds:
print(sec[0])
for item in sec[1]:
print(" ", item['title'])
return feeds
@evmn
Copy link
Author

evmn commented Feb 16, 2022

ebook-convert "Webvision.recipe" .mobi \
		--authors="Helga Kolb & Ralph Nelson & Eduardo Fernandez & Bryan Jones" \
		--title="WEBVISION: The Organization of the Retina and Visual System" \
		--pubdate="2022-02-17" \
		--publisher="Webvision" \
		--output-profile=kindle_pw3 \
		--mobi-file-type=new \
		-vv

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment