Skip to content

Instantly share code, notes, and snippets.

@okomestudio
Created July 20, 2020 23:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save okomestudio/ee4f83938bd7ded092b84c5a65d4159c to your computer and use it in GitHub Desktop.
Save okomestudio/ee4f83938bd7ded092b84c5a65d4159c to your computer and use it in GitHub Desktop.
Convert the CS flash cards by jwasham to CSV for Anki import.
#!/usr/bin/env python
"""Convert the CS flash cards by jwasham to CSV for Anki import.
On import:
- Choose tab as the field separator
- Ignore lines where first field matches existing note
- Allow HTML in fields
Some tags are added (c, c++, python, code, and vocab) to distinguish the type of cards
and their content. Try importing into a separate deck in order to avoid contaminating
existing ones.
For the source data, see: https://github.com/jwasham/computer-science-flash-cards
"""
import csv
import re
import sqlite3
import sys
from argparse import ArgumentParser
from contextlib import contextmanager
from enum import IntEnum
@contextmanager
def fopen(filename=None, mode="r"):
if filename is None or filename == "-":
f = sys.stdout
try:
yield f
finally:
f.close()
else:
with open(filename, mode) as f:
yield f
class Type(IntEnum):
VOCAB = 1
CODE = 2
def convert(input, output):
conn = sqlite3.connect(input)
conn.text_factory = str
cursor = conn.cursor()
cursor.execute("SELECT type, front, back FROM cards")
items = cursor.fetchall()
write_to_csv(output, items)
def infer_coding_language(front, back, tags):
"""Heuristically infer the coding language used."""
if re.search(r"\bC\+\+(?!\w)", front, re.IGNORECASE):
tags.add("c++")
if re.search(r"\bPython\b", front, re.IGNORECASE):
tags.add("python")
if re.search(r"\bC\b", front, re.IGNORECASE):
tags.add("c")
if re.search(r"\bdef [a-zA-Z]+", back):
tags.add("python")
if re.search(r"\b(void|int) [a-zA-Z]+", back):
tags.add("c")
def write_to_csv(output, items, tags=None):
common_tags = tags or set()
with fopen(output, "w") as f:
writer = csv.writer(f, delimiter="\t", lineterminator="\n")
if tags:
writer.writerow([f"tags: {' '.join(tags)}"])
for item_type, front, back in items:
tags = common_tags.copy() if common_tags else set()
infer_coding_language(front, back, tags)
if item_type == Type.CODE:
tags.add("code")
back = f"<pre>{back}</pre>"
elif item_type == Type.VOCAB:
tags.add("vocab")
back = back.replace("\r\n", "<br>")
writer.writerow([front, back, " ".join(tags)])
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("input", help="SQLite database file")
parser.add_argument("--output", "-o", help="Output CSV file", default=None)
args = parser.parse_args()
convert(args.input, args.output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment