-
-
Save kelciour/ef71cee6c4ab70637e89b8b212ddd6cd to your computer and use it in GitHub Desktop.
Improved Quizlet to Anki 2.1 Importer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#------------------------------------------------------------------------------- | |
# | |
# Name: Quizlet plugin for Anki 2.0 | |
# Purpose: Import decks from Quizlet into Anki 2.0 | |
# Author: | |
# - Original: (c) Rolph Recto 2012, last updated 12/06/2012 | |
# https://github.com/rolph-recto/Anki-Quizlet | |
# - Also: Contributions from https://ankiweb.net/shared/info/1236400902 | |
# - Current: JDMaybeMD | |
# Created: 04/07/2017 | |
# | |
# Changlog: Inital release | |
# - Rolph's plugin functionality was broken, so... | |
# - removed search tables and associated functions to KISS | |
# - reused the original API key, dunno if that's OK | |
# - replaced with just one box, for a quizlet URL | |
# - added basic error handling for dummies | |
# | |
# Update 04/09/2017 | |
# - modified to now take a full Quizlet url for ease of use | |
# - provide feedback if trying to download a private deck | |
# - return RFC 2616 response codes when error handling | |
# - don't make a new card type every time a new deck imported | |
# - better code documentation so people can modify it | |
# | |
# Update 01/31/2018 | |
# - get original quality images instead of mobile version | |
# | |
# Changlog (by kelciour): | |
# Update 09/12/2018 | |
# - updated to Anki 2.1 | |
# | |
# Update 04/02/2020 | |
# - download a set without API key since it's no longer working | |
# | |
# Update 19/02/2020 | |
# - download private or password-protected sets using cookies | |
# | |
# Update 25/02/2020 | |
# - make it work again by adding the User-Agent header | |
# | |
# Update 14/04/2020 | |
# - try to get title from HTML a bit differently | |
# | |
# Update 29/04/2020 | |
# - suggest to disable VPN if a set is blocked by a captcha | |
# | |
# Update 04/05/2020 | |
# - remove Flashcards from the name of the deck | |
# - rename and create a new Basic Quizlet note type if some fields doesn't exist | |
# | |
# Update 17/05/2020 | |
# - use setPageData and assistantModeData as a possible source for flashcards data | |
# | |
# Update 22/07/2020 | |
# - fix for Anki 2.1.28 | |
# | |
# Update 30/08/2020 | |
# - add Return shortcut | |
# | |
# Update 31/08/2020 | |
# - add rich text formatting | |
# | |
# Update 03/08/2020 | |
# - make it working again after Quizlet update | |
#------------------------------------------------------------------------------- | |
#!/usr/bin/env python | |
__window = None | |
import sys, math, time, urllib.parse, json, re, os | |
# Anki | |
from aqt import mw | |
from aqt.qt import * | |
from aqt.utils import showText | |
from anki.utils import checksum | |
import requests | |
import shutil | |
requests.packages.urllib3.disable_warnings() | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36" | |
} | |
rich_text_css = """ | |
:root { | |
--yellow_light_background: #fff4e5; | |
--blue_light_background: #cde7fa; | |
--pink_light_background: #fde8ff; | |
} | |
.nightMode { | |
--yellow_light_background: #8c7620; | |
--blue_light_background: #295f87; | |
--pink_light_background: #7d537f; | |
} | |
.bgY { | |
background-color: var(--yellow_light_background); | |
} | |
.bgB { | |
background-color: var(--blue_light_background); | |
} | |
.bgP { | |
background-color: var(--pink_light_background); | |
} | |
""" | |
# add custom model if needed | |
def addCustomModel(name, col): | |
# create custom model for imported deck | |
mm = col.models | |
existing = mm.byName("Basic Quizlet") | |
if existing: | |
fields = mm.fieldNames(existing) | |
if "Front" in fields and "Back" in fields: | |
return existing | |
else: | |
existing['name'] += "-" + checksum(str(time.time()))[:5] | |
mm.save(existing) | |
m = mm.new("Basic Quizlet") | |
# add fields | |
mm.addField(m, mm.newField("Front")) | |
mm.addField(m, mm.newField("Back")) | |
mm.addField(m, mm.newField("Add Reverse")) | |
# add cards | |
t = mm.newTemplate("Normal") | |
# front | |
t['qfmt'] = "{{Front}}" | |
t['afmt'] = "{{FrontSide}}\n\n<hr id=answer>\n\n{{Back}}" | |
mm.addTemplate(m, t) | |
# back | |
t = mm.newTemplate("Reverse") | |
t['qfmt'] = "{{#Add Reverse}}{{Back}}{{/Add Reverse}}" | |
t['afmt'] = "{{FrontSide}}\n\n<hr id=answer>\n\n{{Front}}" | |
mm.addTemplate(m, t) | |
mm.add(m) | |
return m | |
# throw up a window with some info (used for testing) | |
def debug(message): | |
QMessageBox.information(QWidget(), "Message", message) | |
class QuizletWindow(QWidget): | |
# used to access Quizlet API | |
__APIKEY = "ke9tZw8YM6" | |
# main window of Quizlet plugin | |
def __init__(self): | |
super(QuizletWindow, self).__init__() | |
self.results = None | |
self.thread = None | |
self.initGUI() | |
# create GUI skeleton | |
def initGUI(self): | |
self.box_top = QVBoxLayout() | |
self.box_upper = QHBoxLayout() | |
# left side | |
self.box_left = QVBoxLayout() | |
# quizlet url field | |
self.box_name = QHBoxLayout() | |
self.label_url = QLabel("Quizlet URL:") | |
self.text_url = QLineEdit("",self) | |
self.text_url.setMinimumWidth(300) | |
self.box_name.addWidget(self.label_url) | |
self.box_name.addWidget(self.text_url) | |
# add layouts to left | |
self.box_left.addLayout(self.box_name) | |
# right side | |
self.box_right = QVBoxLayout() | |
# code (import set) button | |
self.box_code = QHBoxLayout() | |
self.button_code = QPushButton("Import Deck", self) | |
self.button_code.setShortcut(QKeySequence("Return")) | |
self.box_code.addStretch(1) | |
self.box_code.addWidget(self.button_code) | |
self.button_code.clicked.connect(self.onCode) | |
# add layouts to right | |
self.box_right.addLayout(self.box_code) | |
# add left and right layouts to upper | |
self.box_upper.addLayout(self.box_left) | |
self.box_upper.addSpacing(20) | |
self.box_upper.addLayout(self.box_right) | |
# results label | |
self.label_results = QLabel("\r\n<i>Example: https://quizlet.com/150875612/usmle-flash-cards/</i>") | |
# add all widgets to top layout | |
self.box_top.addLayout(self.box_upper) | |
self.box_top.addWidget(self.label_results) | |
self.box_top.addStretch(1) | |
self.setLayout(self.box_top) | |
# go, baby go! | |
self.setMinimumWidth(500) | |
self.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Minimum) | |
self.setWindowTitle("Improved Quizlet to Anki Importer") | |
self.show() | |
def onCode(self): | |
# grab url input | |
url = self.text_url.text() | |
# voodoo needed for some error handling | |
if urllib.parse.urlparse(url).scheme: | |
urlDomain = urllib.parse.urlparse(url).netloc | |
urlPath = urllib.parse.urlparse(url).path | |
else: | |
urlDomain = urllib.parse.urlparse("https://"+url).netloc | |
urlPath = urllib.parse.urlparse("https://"+url).path | |
# validate quizlet URL | |
if url == "": | |
self.label_results.setText("Oops! You forgot the deck URL :(") | |
return | |
elif not "quizlet.com" in urlDomain: | |
self.label_results.setText("Oops! That's not a Quizlet URL :(") | |
return | |
# validate and set Quizlet deck ID | |
quizletDeckID = urlPath.strip("/") | |
if quizletDeckID == "": | |
self.label_results.setText("Oops! Please use the full deck URL :(") | |
return | |
elif not bool(re.search(r'\d', quizletDeckID)): | |
self.label_results.setText("Oops! No deck ID found in path <i>{0}</i> :(".format(quizletDeckID)) | |
return | |
else: # get first set of digits from url path | |
quizletDeckID = re.search(r"\d+", quizletDeckID).group(0) | |
# and aaawaaaay we go... | |
self.label_results.setText("Connecting to Quizlet...") | |
# build URL | |
# deck_url = ("https://api.quizlet.com/2.0/sets/{0}".format(quizletDeckID)) | |
# deck_url += ("?client_id={0}".format(QuizletWindow.__APIKEY)) | |
deck_url = "https://quizlet.com/{}/flashcards".format(quizletDeckID) | |
# stop previous thread first | |
# if self.thread is not None: | |
# self.thread.terminate() | |
# download the data! | |
self.thread = QuizletDownloader(self, deck_url) | |
self.thread.start() | |
while not self.thread.isFinished(): | |
mw.app.processEvents() | |
self.thread.wait(50) | |
# error fetching data | |
if self.thread.error: | |
if self.thread.errorCode == 403: | |
if self.thread.errorCaptcha: | |
self.label_results.setText("Sorry, it's behind a captcha. Try to disable VPN") | |
else: | |
self.label_results.setText("Sorry, this is a private deck :(") | |
elif self.thread.errorCode == 404: | |
self.label_results.setText("Can't find a deck with the ID <i>{0}</i>".format(quizletDeckID)) | |
else: | |
self.label_results.setText("Unknown Error") | |
# errorMessage = json.loads(self.thread.errorMessage) | |
# showText(json.dumps(errorMessage, indent=4)) | |
showText(self.thread.errorMessage) | |
else: # everything went through, let's roll! | |
deck = self.thread.results | |
# self.label_results.setText(("Importing deck {0} by {1}...".format(deck["title"], deck["created_by"]))) | |
self.label_results.setText(("Importing deck {0}...".format(deck["title"]))) | |
self.createDeck(deck) | |
# self.label_results.setText(("Success! Imported <b>{0}</b> ({1} cards by <i>{2}</i>)".format(deck["title"], deck["term_count"], deck["created_by"]))) | |
self.label_results.setText(("Success! Imported <b>{0}</b> ({1} cards)".format(deck["title"], deck["term_count"]))) | |
# self.thread.terminate() | |
self.thread = None | |
def createDeck(self, result): | |
config = mw.addonManager.getConfig(__name__) | |
# create new deck and custom model | |
if "set" in result: | |
name = result['set']['title'] | |
elif "studyable" in result: | |
name = result['studyable']['title'] | |
else: | |
name = result['title'] | |
if "termIdToTermsMap" in result: | |
terms = [] | |
for c in sorted(result['termIdToTermsMap'].values(), key=lambda v: v["rank"]): | |
terms.append({ | |
'word': c['word'], | |
'definition': c['definition'], | |
'_imageUrl': c["_imageUrl"] or '', | |
'wordRichText': c.get('wordRichText', ''), | |
'definitionRichText': c.get('definitionRichText', ''), | |
}) | |
elif "studiableData" in result: | |
terms = {} | |
data = result["studiableData"] | |
for d in data["studiableItems"]: | |
terms[d["id"]] = {} | |
smc = {} | |
for d in data["studiableMediaConnections"]: | |
id_ = d["connectionModelId"] | |
if id_ not in smc: | |
smc[id_] = {} | |
# "plainText", "languageCode", "ttsUrl", "ttsSlowUrl", "richText" | |
for k, v in d.get("text", {}).items(): | |
smc[id_][k] = v | |
if "image" in d: | |
smc[id_]["_imageUrl"] = d["image"]["url"] | |
for d in data["studiableCardSides"]: | |
id_ = d["studiableItemId"] | |
terms[id_][d["label"]] = smc[d["id"]]["plainText"] | |
terms[id_]["{}RichText".format(d["label"])] = smc[d["id"]]["richText"] | |
terms[id_]["_imageUrl"] = smc[d["id"]].get("_imageUrl", "") | |
terms = terms.values() | |
else: | |
terms = result['terms'] | |
result['term_count'] = len(terms) | |
deck = mw.col.decks.get(mw.col.decks.id(name)) | |
model = addCustomModel(name, mw.col) | |
if config["rich_text_formatting"] and ".bgY" not in model["css"]: | |
model["css"] += rich_text_css | |
# assign custom model to new deck | |
mw.col.decks.select(deck["id"]) | |
mw.col.decks.save(deck) | |
# assign new deck to custom model | |
mw.col.models.setCurrent(model) | |
model["did"] = deck["id"] | |
mw.col.models.save(model) | |
def getText(d, text=''): | |
if d is None: | |
return text | |
if d['type'] == 'text': | |
text = d['text'] | |
if 'marks' in d: | |
for m in d['marks']: | |
if m['type'] in ['b', 'i', 'u']: | |
text = '<{0}>{1}</{0}>'.format(m['type'], text) | |
if 'attrs' in m: | |
attrs = " ".join(['{}="{}"'.format(k, v) for k, v in m['attrs'].items()]) | |
text = '<span {}>{}</span>'.format(attrs, text) | |
return text | |
text = ''.join([getText(c) for c in d['content']]) | |
if d['type'] == 'paragraph': | |
text = '<div>{}</div>'.format(text) | |
return text | |
def ankify(text): | |
text = text.replace('\n','<br>') | |
text = re.sub(r'\*(.+?)\*', r'<b>\1</b>', text) | |
return text | |
for term in terms: | |
note = mw.col.newNote() | |
note["Front"] = ankify(term['word']) | |
note["Back"] = ankify(term['definition']) | |
if config["rich_text_formatting"]: | |
note["Front"] = getText(term['wordRichText'], note["Front"]) | |
note["Back"] = getText(term['definitionRichText'], note["Back"]) | |
if "photo" in term and term["photo"]: | |
photo_urls = { | |
"1": "https://farm{1}.staticflickr.com/{2}/{3}_{4}.jpg", | |
"2": "https://o.quizlet.com/i/{1}.jpg", | |
"3": "https://o.quizlet.com/{1}.{2}" | |
} | |
img_tkns = term["photo"].split(',') | |
img_type = img_tkns[0] | |
term["_imageUrl"] = photo_urls[img_type].format(*img_tkns) | |
if '_imageUrl' in term and term["_imageUrl"]: | |
# file_name = self.fileDownloader(term["image"]["url"]) | |
file_name = self.fileDownloader(term["_imageUrl"]) | |
if note["Back"]: | |
note["Back"] += "<div><br></div>" | |
note["Back"] += '<div><img src="{0}"></div>'.format(file_name) | |
mw.app.processEvents() | |
if config["rich_text_formatting"]: | |
note["Front"] = '<link rel="stylesheet" href="_quizlet.css">' + note["Front"] | |
mw.col.addNote(note) | |
mw.col.reset() | |
mw.reset() | |
# download the images | |
def fileDownloader(self, url): | |
url = url.replace('_m', '') | |
file_name = "quizlet-" + url.split('/')[-1] | |
# get original, non-mobile version of images | |
r = requests.get(url, stream=True, verify=False, headers=headers) | |
if r.status_code == 200: | |
with open(file_name, 'wb') as f: | |
r.raw.decode_content = True | |
shutil.copyfileobj(r.raw, f) | |
return file_name | |
class QuizletDownloader(QThread): | |
# thread that downloads results from the Quizlet API | |
def __init__(self, window, url): | |
super(QuizletDownloader, self).__init__() | |
self.window = window | |
self.url = url | |
self.results = None | |
self.error = False | |
self.errorCode = None | |
self.errorCaptcha = False | |
self.errorReason = None | |
self.errorMessage = None | |
def run(self): | |
r = None | |
try: | |
config = mw.addonManager.getConfig(__name__) | |
if config["rich_text_formatting"] and not os.path.exists("_quizlet.css"): | |
with open("_quizlet.css", "w") as f: | |
f.write(rich_text_css.lstrip()) | |
cookies = {} | |
if config["qlts"]: | |
cookies = { "qlts": config["qlts"] } | |
elif config["cookies"]: | |
from http.cookies import SimpleCookie | |
C = SimpleCookie() | |
C.load(config["cookies"]) | |
cookies = { key: morsel.value for key, morsel in C.items() } | |
r = requests.get(self.url, verify=False, headers=headers, cookies=cookies) | |
r.raise_for_status() | |
regex = re.escape('window.Quizlet["setPasswordData"]') | |
if re.search(regex, r.text): | |
self.error = True | |
self.errorCode = 403 | |
return | |
regex = re.escape('window.Quizlet["setPageData"] = ') | |
regex += r'(.+?)' | |
regex += re.escape('; QLoad("Quizlet.setPageData");') | |
m = re.search(regex, r.text) | |
if not m: | |
regex = re.escape('window.Quizlet["assistantModeData"] = ') | |
regex += r'(.+?)' | |
regex += re.escape('; QLoad("Quizlet.assistantModeData");') | |
m = re.search(regex, r.text) | |
if not m: | |
regex = re.escape('window.Quizlet["cardsModeData"] = ') | |
regex += r'(.+?)' | |
regex += re.escape('; QLoad("Quizlet.cardsModeData");') | |
m = re.search(regex, r.text) | |
data = m.group(1).strip() | |
self.results = json.loads(data) | |
title = os.path.basename(self.url.strip()) or "Quizlet Flashcards" | |
m = re.search(r'<title>(.+?)</title>', r.text) | |
if m: | |
title = m.group(1) | |
title = re.sub(r' \| Quizlet$', '', title) | |
title = re.sub(r'^Flashcards ', '', title) | |
title = re.sub(r'\s+', ' ', title) | |
title = title.strip() | |
self.results['title'] = title | |
except requests.HTTPError as e: | |
self.error = True | |
self.errorCode = e.response.status_code | |
self.errorMessage = e.response.text | |
if "CF-Chl-Bypass" in e.response.headers: | |
self.errorCaptcha = True | |
except ValueError as e: | |
self.error = True | |
self.errorMessage = "Invalid json: {0}".format(e) | |
except Exception as e: | |
self.error = True | |
self.errorMessage = "{}\n-----------------\n{}".format(e, r.text) | |
# yep, we got it | |
# plugin was called from Anki | |
def runQuizletPlugin(): | |
global __window | |
__window = QuizletWindow() | |
# create menu item in Anki | |
action = QAction("Import from Quizlet", mw) | |
action.triggered.connect(runQuizletPlugin) | |
mw.form.menuTools.addAction(action) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment