Last active
June 8, 2021 16:23
-
-
Save kevindoran/29bfc50d0ad8cc83fae82869a60e4484 to your computer and use it in GitHub Desktop.
Export Anki decks as webpages (targeting Hugo)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import anki | |
import anki.collection | |
import anki.exporting | |
import datetime | |
import os | |
import re | |
import cssutils | |
import shutil | |
import logging | |
# Need to use absolute paths as the Anki exporter calls | |
# os.chdir() at various points. | |
COLLECTION_PATH = "/www/anki_data/collection.anki2" | |
MEDIA_DIR = "/www/anki_data/collection.media" | |
OUT_DIR = "/www/src/content/anki/" | |
CSS_DIR = "/www/src/themes/blank/static/css" | |
FILENAME_FORMAT = "{card_id}.html" | |
RENDER_FORMAT = """ | |
--- | |
title: "{title}" | |
date: {date:%Y-%m-%d} | |
lastmod: {lastmod:%Y-%m-%d} | |
draft: false | |
type: ankimath | |
weight: {weight} | |
--- | |
<link rel="stylesheet" href="/css/{css}.css"> | |
<div class="anki-front" style="display:block;"> | |
{front} | |
</div> | |
<div class="anki-back" style="display:none;"> | |
{back} | |
</div> | |
""" | |
cwd = os.getcwd() | |
col = anki.Collection(COLLECTION_PATH) | |
# Why does Anki change directory? | |
os.chdir(cwd) | |
def title(card): | |
if 'Title' in card.note(): | |
title = card.note()['Title'] | |
else: | |
title = "(No title)" | |
# Remove html tags that get into the titles. They don't display in | |
# Hugo's title from page metadata. | |
title = re.sub('<[^<]+?>', '', title) | |
# Escape escape sequence (backslash and quote) | |
title = re.sub(r'([\\"])', r'\\\1', title) | |
return title | |
# Currently doesn't need to be any more complicated. | |
def url_encode(s): | |
return s.lower().replace(' ', '-').replace('_', '-') | |
def creation_date(card): | |
return datetime.datetime.fromtimestamp(card.id/1000) | |
def last_mod_date(card): | |
# Not sure whether to use a card or note mod date. | |
return datetime.datetime.fromtimestamp(card.note().mod) | |
class DeckToSiteExporter: | |
# Generalize this later if needed. | |
NOTE_TYPES = ['Math Cloze', 'Idea_description_example', | |
'Cloze', 'Image Occlusion Enhanced', 'Basic_with_title', | |
'Basic', 'Math basic', 'Reversed (general)', | |
'Title only reverse'] | |
def __init__(self, deck_name, deck_shortname): | |
self.deck_dir = os.path.join(OUT_DIR, deck_shortname) | |
self.deck_name = deck_name | |
self.deck_id = None | |
self.card_ids = None | |
def export(self): | |
self.deck_id = col.decks.id_for_name(self.deck_name) | |
# Maybe filter by tags here like 'copyright'. | |
self.card_ids = col.find_cards(f'deck:"{self.deck_name}" -is:suspended') | |
self.write_package() | |
self.write_css() | |
self.write_cards() | |
def card_dir(self, card): | |
p = os.path.join(self.deck_dir, str(card.id)) | |
return p | |
def card_path(self, card): | |
p = os.path.join(self.card_dir(card), 'index.html') | |
return p | |
def write_package(self): | |
exporter = anki.exporting.AnkiPackageExporter(col) | |
exporter.did = self.deck_id | |
#exporter.cids = card_ids | |
exporter.includeHTML = True | |
exporter.includeTags = False | |
exporter.includeSched = False | |
exporter.includeMedia = True | |
export_path = os.path.join(self.deck_dir, f'package{exporter.ext}') | |
exporter.exportInto(export_path) | |
def write_css(self): | |
for note_type in self.NOTE_TYPES: | |
css = col.models.byName(note_type)['css'] | |
sheet = cssutils.parseString(css) | |
for rule in sheet: | |
if rule.type == cssutils.css.CSSRule.STYLE_RULE: | |
rule.selectorText = ".anki-body " + rule.selectorText | |
css = str(sheet.cssText, 'utf-8') | |
note_file_name = note_type.lower().replace(' ', '-') | |
css_path = os.path.join(CSS_DIR, f'{note_file_name}.css') | |
with open(css_path, 'w+') as f: | |
f.write(css) | |
def write_cards(self): | |
def write_card_html(card_id): | |
card = col.getCard(card_id) | |
logging.info(f'Writing card: {title(card)}') | |
os.makedirs(self.card_dir(card), exist_ok=True) | |
self.process_media(card) | |
with open(self.card_path(card), 'w+') as f: | |
html = RENDER_FORMAT.format( | |
title=title(card), | |
date=creation_date(card), | |
lastmod=last_mod_date(card), | |
front=card.render_output().question_text, | |
back=card.render_output().answer_text, | |
weight=self.weight(card), | |
css=url_encode(card.note_type()["name"])) | |
f.write(html) | |
for card_id in self.card_ids: | |
write_card_html(card_id) | |
def process_media(self, card): | |
front = card.render_output().question_text | |
back = card.render_output().answer_text | |
pattern = re.compile(r'<img.*src="([^"]+)"\s?/?>') | |
for filename in re.findall(pattern, front + back): | |
src = os.path.join(MEDIA_DIR, filename) | |
dst = os.path.join(self.card_dir(card), filename) | |
shutil.copy(src, dst) | |
@staticmethod | |
def weight(card, r=re.compile(r'(\d+)\.(\d+)\.(\d+)')): | |
if not 'Order' in card.note(): | |
return 0 | |
weight_str = card.note()['Order'] | |
m = r.match(weight_str) | |
if not m: | |
return 0 | |
# Need to convert to a number. Use 100 as base. | |
weight = 100**3*int(m.group(1)) + \ | |
100**2*int(m.group(2)) + \ | |
100**1*int(m.group(3)) | |
return weight | |
if __name__ == '__main__': | |
logging.getLogger().setLevel(logging.INFO) | |
DeckToSiteExporter('Math and science::Topology', 'topology').export() | |
DeckToSiteExporter('Math and science::Analysis', 'analysis').export() | |
DeckToSiteExporter('Math and science::Algebra', 'algebra').export() | |
DeckToSiteExporter('Math and science::Theory of Computation', 'computation').export() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment