Skip to content

Instantly share code, notes, and snippets.

@kevindoran
Last active June 8, 2021 16:23
Show Gist options
  • Save kevindoran/29bfc50d0ad8cc83fae82869a60e4484 to your computer and use it in GitHub Desktop.
Save kevindoran/29bfc50d0ad8cc83fae82869a60e4484 to your computer and use it in GitHub Desktop.
Export Anki decks as webpages (targeting Hugo)
import anki
import anki.collection
import anki.exporting
import datetime
import os
import re
import cssutils
import shutil
import logging
# Need to use absolute paths as the Anki exporter calls
# os.chdir() at various points.
COLLECTION_PATH = "/www/anki_data/collection.anki2"
MEDIA_DIR = "/www/anki_data/collection.media"
OUT_DIR = "/www/src/content/anki/"
CSS_DIR = "/www/src/themes/blank/static/css"
FILENAME_FORMAT = "{card_id}.html"
RENDER_FORMAT = """
---
title: "{title}"
date: {date:%Y-%m-%d}
lastmod: {lastmod:%Y-%m-%d}
draft: false
type: ankimath
weight: {weight}
---
<link rel="stylesheet" href="/css/{css}.css">
<div class="anki-front" style="display:block;">
{front}
</div>
<div class="anki-back" style="display:none;">
{back}
</div>
"""
cwd = os.getcwd()
col = anki.Collection(COLLECTION_PATH)
# Why does Anki change directory?
os.chdir(cwd)
def title(card):
if 'Title' in card.note():
title = card.note()['Title']
else:
title = "(No title)"
# Remove html tags that get into the titles. They don't display in
# Hugo's title from page metadata.
title = re.sub('<[^<]+?>', '', title)
# Escape escape sequence (backslash and quote)
title = re.sub(r'([\\"])', r'\\\1', title)
return title
# Currently doesn't need to be any more complicated.
def url_encode(s):
return s.lower().replace(' ', '-').replace('_', '-')
def creation_date(card):
return datetime.datetime.fromtimestamp(card.id/1000)
def last_mod_date(card):
# Not sure whether to use a card or note mod date.
return datetime.datetime.fromtimestamp(card.note().mod)
class DeckToSiteExporter:
# Generalize this later if needed.
NOTE_TYPES = ['Math Cloze', 'Idea_description_example',
'Cloze', 'Image Occlusion Enhanced', 'Basic_with_title',
'Basic', 'Math basic', 'Reversed (general)',
'Title only reverse']
def __init__(self, deck_name, deck_shortname):
self.deck_dir = os.path.join(OUT_DIR, deck_shortname)
self.deck_name = deck_name
self.deck_id = None
self.card_ids = None
def export(self):
self.deck_id = col.decks.id_for_name(self.deck_name)
# Maybe filter by tags here like 'copyright'.
self.card_ids = col.find_cards(f'deck:"{self.deck_name}" -is:suspended')
self.write_package()
self.write_css()
self.write_cards()
def card_dir(self, card):
p = os.path.join(self.deck_dir, str(card.id))
return p
def card_path(self, card):
p = os.path.join(self.card_dir(card), 'index.html')
return p
def write_package(self):
exporter = anki.exporting.AnkiPackageExporter(col)
exporter.did = self.deck_id
#exporter.cids = card_ids
exporter.includeHTML = True
exporter.includeTags = False
exporter.includeSched = False
exporter.includeMedia = True
export_path = os.path.join(self.deck_dir, f'package{exporter.ext}')
exporter.exportInto(export_path)
def write_css(self):
for note_type in self.NOTE_TYPES:
css = col.models.byName(note_type)['css']
sheet = cssutils.parseString(css)
for rule in sheet:
if rule.type == cssutils.css.CSSRule.STYLE_RULE:
rule.selectorText = ".anki-body " + rule.selectorText
css = str(sheet.cssText, 'utf-8')
note_file_name = note_type.lower().replace(' ', '-')
css_path = os.path.join(CSS_DIR, f'{note_file_name}.css')
with open(css_path, 'w+') as f:
f.write(css)
def write_cards(self):
def write_card_html(card_id):
card = col.getCard(card_id)
logging.info(f'Writing card: {title(card)}')
os.makedirs(self.card_dir(card), exist_ok=True)
self.process_media(card)
with open(self.card_path(card), 'w+') as f:
html = RENDER_FORMAT.format(
title=title(card),
date=creation_date(card),
lastmod=last_mod_date(card),
front=card.render_output().question_text,
back=card.render_output().answer_text,
weight=self.weight(card),
css=url_encode(card.note_type()["name"]))
f.write(html)
for card_id in self.card_ids:
write_card_html(card_id)
def process_media(self, card):
front = card.render_output().question_text
back = card.render_output().answer_text
pattern = re.compile(r'<img.*src="([^"]+)"\s?/?>')
for filename in re.findall(pattern, front + back):
src = os.path.join(MEDIA_DIR, filename)
dst = os.path.join(self.card_dir(card), filename)
shutil.copy(src, dst)
@staticmethod
def weight(card, r=re.compile(r'(\d+)\.(\d+)\.(\d+)')):
if not 'Order' in card.note():
return 0
weight_str = card.note()['Order']
m = r.match(weight_str)
if not m:
return 0
# Need to convert to a number. Use 100 as base.
weight = 100**3*int(m.group(1)) + \
100**2*int(m.group(2)) + \
100**1*int(m.group(3))
return weight
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
DeckToSiteExporter('Math and science::Topology', 'topology').export()
DeckToSiteExporter('Math and science::Analysis', 'analysis').export()
DeckToSiteExporter('Math and science::Algebra', 'algebra').export()
DeckToSiteExporter('Math and science::Theory of Computation', 'computation').export()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment