Last active
November 24, 2025 12:39
-
-
Save Koorosh14/2ff121652407f7d5ceb170948a5d5ba0 to your computer and use it in GitHub Desktop.
Wiktionary to Anki Card Generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * Wiktionary to Anki Card Generator | |
| * https://gist.github.com/Koorosh14/2ff121652407f7d5ceb170948a5d5ba0 | |
| * | |
| * This script generates Anki cards from Wiktionary pages for verbs, nouns, adjectives, etc. | |
| * | |
| * How to use: | |
| * 1. Open a Wiktionary page. | |
| * 2. Copy the content of this script to your browser's console. | |
| * (You can also use browser extensions like "Custom Style Script".) | |
| * 3. Use the "Generate Front (EN-DE)" and "Generate Back (EN-DE)" buttons to create the front and back content and paste them to Anki. | |
| */ | |
| let mainWrapper = null; | |
| let isVerb = isNoun = isAdjective = isAdverb = isWortverbindung = false; | |
| let theTable = null; | |
| let titles = { | |
| 'nichtmehrgultigeschreibweisen': '', | |
| 'anmerkung': '', | |
| 'alternativeschreibweisen': '', | |
| 'bedeutungen': '', | |
| 'abkurzungen': '', | |
| 'herkunft': '', | |
| 'sinnverwandteWorter': '', | |
| 'synonyme': '', | |
| 'gegenworter': '', | |
| 'oberbegriffe': '', | |
| 'unterbegriffe': '', | |
| 'beispiele': '', | |
| 'redewendungen': '', | |
| 'sprichworter': '', | |
| 'charakteristischeWortkombinationen': '', | |
| 'wortbildungen': '', | |
| 'ahnlicheWorter': '', | |
| 'entlehnungen': '', | |
| }; | |
| let translation = null; | |
| document.addEventListener('DOMContentLoaded', addButtons); | |
| /** | |
| * Adds buttons next to each section. | |
| * | |
| * @returns {void} | |
| */ | |
| function addButtons() | |
| { | |
| document.querySelectorAll('[data-mw-section-id]').forEach((section) => { | |
| // Skip the empty section above the page and the main section | |
| if (section.getAttribute('data-mw-section-id') == '0' || section.getAttribute('data-mw-section-id') == '1') return; | |
| // Skip sections that are not direct children of the main section | |
| if (section.parentElement.getAttribute('data-mw-section-id') != '1') return; | |
| const buttonGenerateCardFront = document.createElement('button'); | |
| buttonGenerateCardFront.innerText = 'Generate Front (EN-DE)'; | |
| buttonGenerateCardFront.style.position = 'relative'; | |
| buttonGenerateCardFront.style.top = '30px'; | |
| buttonGenerateCardFront.style.right = '20px'; | |
| buttonGenerateCardFront.style.width = '100px'; | |
| buttonGenerateCardFront.style.height = '50px'; | |
| buttonGenerateCardFront.style.float = 'right'; | |
| section.prepend(buttonGenerateCardFront); | |
| buttonGenerateCardFront.addEventListener('click', (event) => generateFront(event.target)); | |
| const buttonGenerateCardBack = document.createElement('button'); | |
| buttonGenerateCardBack.innerText = 'Generate Back (EN-DE)'; | |
| buttonGenerateCardBack.style.position = 'relative'; | |
| buttonGenerateCardBack.style.top = '30px'; | |
| buttonGenerateCardBack.style.right = '10px'; | |
| buttonGenerateCardBack.style.width = '100px'; | |
| buttonGenerateCardBack.style.height = '50px'; | |
| buttonGenerateCardBack.style.float = 'right'; | |
| section.prepend(buttonGenerateCardBack); | |
| buttonGenerateCardBack.addEventListener('click', (event) => generateBack(event.target)); | |
| }); | |
| } | |
| /** | |
| * Generates the front content for the card. | |
| * | |
| * @param {Element} buttonElement - The button element that triggered the action. | |
| * | |
| * @returns {void} | |
| */ | |
| function generateFront(buttonElement) | |
| { | |
| initializeVariables(buttonElement); | |
| if (!isVerb && !isNoun && !isAdjective && !isAdverb && !isWortverbindung) | |
| { | |
| alert('This script only works on verb, noun, adjective, adverb, or word connection pages.'); | |
| return; | |
| } | |
| copyToClipboard(translation.trim() + ' <br><br>[Your Example Here]'); | |
| } | |
| /** | |
| * Generates the back content for the card. | |
| * | |
| * @param {Element} buttonElement - The button element that triggered the action. | |
| * | |
| * @returns {void} | |
| */ | |
| function generateBack(buttonElement) | |
| { | |
| initializeVariables(buttonElement); | |
| if (!isVerb && !isNoun && !isAdjective && !isAdverb && !isWortverbindung) | |
| { | |
| alert('This script only works on verb, noun, adjective, adverb, or word connection pages.'); | |
| return; | |
| } | |
| // Get the main title and remove the " (Deutsch)" part (for non-noun words) | |
| let backContent = ''; | |
| if (!isNoun && !isAdjective) | |
| { | |
| backContent = document.querySelector('section[data-mw-section-id="1"]>div').innerText.trim(); | |
| backContent = backContent.substring(0, backContent.indexOf(' (')) + ' <br>'; | |
| } | |
| // Add table content | |
| if (!isAdverb && !isWortverbindung) | |
| backContent += getTableContent() + ' <br>'; | |
| backContent += '<br>[Your Example Here] <br><br>'; | |
| // Add titles | |
| for (const titleName in titles) | |
| if (titles[titleName] !== '') | |
| backContent += getTitleContent(titles[titleName]); | |
| // Remove the last line break | |
| if (backContent.endsWith(' <br>')) | |
| backContent = backContent.substring(0, backContent.length - 10).trim(); | |
| copyToClipboard(backContent); | |
| } | |
| /** | |
| * Copies the given content to the clipboard. | |
| * | |
| * @param {string} content - HTML content to copy to the clipboard. | |
| * | |
| * @returns {void} | |
| */ | |
| function copyToClipboard(content) | |
| { | |
| navigator.clipboard.writeText(content).then(() => { | |
| alert('Content copied to clipboard!'); | |
| }, (err) => { | |
| alert('Failed to copy content: ' + err + '\nA fallback div is created instead.'); | |
| const fallbackDiv = document.createElement('div'); | |
| fallbackDiv.innerHTML = content; | |
| fallbackDiv.style.position = 'fixed'; | |
| fallbackDiv.style.top = '10px'; | |
| fallbackDiv.style.left = '10px'; | |
| fallbackDiv.style.width = '65%'; | |
| fallbackDiv.style.background = 'white'; | |
| fallbackDiv.style.zIndex = '500'; | |
| // fallbackDiv.addEventListener('click', (e) => e.target.style.display = 'none'); // Hide the div on click | |
| document.body.appendChild(fallbackDiv); | |
| }); | |
| } | |
| /** | |
| * Initializes variables based on the button element. | |
| * | |
| * @param {Element} buttonElement - The button element that triggered the action. | |
| * | |
| * @returns {void} | |
| */ | |
| function initializeVariables(buttonElement) | |
| { | |
| // Reset all variables first | |
| mainWrapper = null; | |
| isVerb = isNoun = isAdjective = isAdverb = isWortverbindung = false; | |
| theTable = null; | |
| for (const titleName in titles) titles[titleName] = ''; | |
| translation = null; | |
| let buttonParent = buttonElement.parentElement; | |
| if (buttonParent.querySelector('h3#Verb') !== null) | |
| { | |
| isVerb = true; | |
| mainWrapper = buttonParent.querySelector('h3#Verb').parentElement.parentElement; | |
| } else if (buttonParent.querySelector('h3#Adjektiv') !== null) { | |
| isAdjective = true; | |
| mainWrapper = buttonParent.querySelector('h3#Adjektiv').parentElement.parentElement; | |
| } else if (buttonParent.querySelector('h3#Adverb') !== null) { | |
| isAdverb = true; | |
| mainWrapper = buttonParent.querySelector('h3#Adverb').parentElement.parentElement; | |
| } else if (buttonParent.querySelector('h3#Wortverbindung') !== null) { | |
| isWortverbindung = true; | |
| mainWrapper = buttonParent.querySelector('h3#Wortverbindung').parentElement.parentElement; | |
| } else if (buttonParent.querySelector('div.mw-heading.mw-heading3 a[href="https://de.wiktionary.org/wiki/Hilfe:Wortart#Substantiv"]') !== null) { | |
| // Special case for "Substantiv, n" because the ID has a comma in it | |
| isNoun = true; | |
| mainWrapper = buttonParent.querySelector('div.mw-heading.mw-heading3 a[href="https://de.wiktionary.org/wiki/Hilfe:Wortart#Substantiv"]').parentElement.parentElement.parentElement; | |
| } else if (buttonParent.querySelector('div.mw-heading.mw-heading3 a[href="//de.wiktionary.org/wiki/Hilfe:Wortart#Substantiv"]') !== null) { | |
| // Special case for "Substantiv, n" because the ID has a comma in it | |
| isNoun = true; | |
| mainWrapper = buttonParent.querySelector('div.mw-heading.mw-heading3 a[href="//de.wiktionary.org/wiki/Hilfe:Wortart#Substantiv"]').parentElement.parentElement.parentElement; | |
| } else { | |
| return; | |
| } | |
| theTable = buttonParent.querySelector('table.wikitable.inflection-table.float-right.flexbox'); | |
| mainWrapper.querySelectorAll('p').forEach((currentTitle) => { | |
| switch (currentTitle.innerText.trim()) | |
| { | |
| case 'Nicht mehr gültige Schreibweisen:': | |
| titles.nichtmehrgultigeschreibweisen = currentTitle; | |
| break; | |
| case 'Anmerkung:': | |
| titles.anmerkung = currentTitle; | |
| break; | |
| case 'Alternative Schreibweisen:': | |
| titles.alternativeschreibweisen = currentTitle; | |
| break; | |
| case 'Bedeutungen:': | |
| titles.bedeutungen = currentTitle; | |
| break; | |
| case 'Abkürzungen:': | |
| titles.abkurzungen = currentTitle; | |
| break; | |
| case 'Herkunft:': | |
| titles.herkunft = currentTitle; | |
| break; | |
| case 'Sinnverwandte Wörter:': | |
| titles.sinnverwandteWorter = currentTitle; | |
| break; | |
| case 'Synonyme:': | |
| titles.synonyme = currentTitle; | |
| break; | |
| case 'Gegenwörter:': | |
| titles.gegenworter = currentTitle; | |
| break; | |
| case 'Oberbegriffe:': | |
| titles.oberbegriffe = currentTitle; | |
| break; | |
| case 'Unterbegriffe:': | |
| titles.unterbegriffe = currentTitle; | |
| break; | |
| case 'Beispiele:': | |
| titles.beispiele = currentTitle; | |
| break; | |
| case 'Redewendungen:': | |
| titles.redewendungen = currentTitle; | |
| break; | |
| case 'Sprichwörter:': | |
| titles.sprichworter = currentTitle; | |
| break; | |
| case 'Charakteristische Wortkombinationen:': | |
| titles.charakteristischeWortkombinationen = currentTitle; | |
| break; | |
| case 'Wortbildungen:': | |
| titles.wortbildungen = currentTitle; | |
| break; | |
| case 'Entlehnungen:': | |
| titles.entlehnungen = currentTitle; | |
| break; | |
| case 'Ähnliche Wörter (Deutsch):': | |
| titles.ahnlicheWorter = currentTitle; | |
| break; | |
| } | |
| }); | |
| translation = getTranslation(buttonParent.querySelector('h4#Übersetzungen').parentElement.parentElement); | |
| } | |
| /** | |
| * Extracts the translation from the translation wrapper. | |
| * | |
| * @param {Element} translationWrapper - The wrapper element containing translations. | |
| * | |
| * @returns {string} - The formatted translation string. | |
| */ | |
| function getTranslation(translationWrapper) | |
| { | |
| let translationContent = ''; | |
| // Loop through all the translation tables | |
| translationWrapper.querySelectorAll('div.printNav').forEach((translationTable) => { | |
| let currentTranslation = ''; | |
| let englishTranslation = translationTable.querySelector('li a[href="https://de.wiktionary.org/wiki/Englisch"]'); | |
| if (englishTranslation === null) | |
| englishTranslation = translationTable.querySelector('li a[href="//de.wiktionary.org/wiki/Englisch"]'); | |
| if (englishTranslation === null) return; // Skip if there is no English translation | |
| // Only get the English translation | |
| englishTranslation.parentElement.querySelectorAll('span[lang="en"]').forEach((translation) => { | |
| if (translation.innerText.trim() === '') return; | |
| if (currentTranslation.length > 0) currentTranslation += ', '; | |
| if (isVerb) currentTranslation += 'to '; | |
| // Make the first letter uppercase | |
| currentTranslation += String(translation.innerText.trim()).charAt(0).toUpperCase() + String(translation.innerText.trim()).slice(1); | |
| }); | |
| if (currentTranslation !== '') | |
| { | |
| if (translationContent.length > 0) translationContent += ' - '; | |
| translationContent += '[' + translationTable.querySelector('div.NavHead span.bedeutung').innerText.trim() + '] ' + currentTranslation.trim(); | |
| } | |
| }); | |
| if (translationContent !== '') | |
| { | |
| if (isAdjective) translationContent += ' <i>(adj.)</i>'; | |
| else if (isAdverb) translationContent += ' <i>(adv.)</i>'; | |
| else if (isWortverbindung) translationContent += ' <i>(Wortverbindung)</i>'; | |
| } | |
| return translationContent; | |
| } | |
| /** | |
| * Extracts the content of the table as a formatted string. | |
| * | |
| * @returns {string} - The content of the table formatted as a string. | |
| */ | |
| function getTableContent() | |
| { | |
| if (theTable == null) return ''; | |
| let content = ''; | |
| let rowContent = ''; | |
| theTable.querySelectorAll('tr').forEach((row) => { | |
| if (row.querySelectorAll('td').length <= 0) return; // Skip header rows | |
| // For nouns, just the first cell (Nominativ Singular) is enough, the rest can be skipped | |
| if (isNoun && (content !== '' || rowContent !== '')) return; | |
| // If the row has a header cell (in non-noun words), treat it as a new section | |
| if (!isNoun && row.querySelector('th') !== null) | |
| { | |
| // If there is already content in the row, add it to the main content | |
| if (rowContent !== '') | |
| content += rowContent.trim(); | |
| rowContent = ''; | |
| // If there is already content in the main content, add a line break | |
| if (content !== '') content += ' <br>'; | |
| // Add the header cell content | |
| content += '<b>' + row.querySelector('th').innerText.trim() + ':</b> '; | |
| } | |
| if (isVerb) | |
| { | |
| row.querySelectorAll('td[colspan="3"]').forEach((cell) => { | |
| if (cell.innerText.trim() === '') return; | |
| // For the Perfekt cells (in non-noun words), use a different approach (Perfekt row has a different format) | |
| if (cell.parentElement.querySelector('td a[title="haben"]') !== null || cell.parentElement.querySelector('td a[title="sein"]') !== null) | |
| { | |
| // Duplicate code as above where the row has a header cell | |
| // If there is already content in the row, add it to the main content | |
| if (rowContent !== '') | |
| content += rowContent.trim(); | |
| rowContent = ''; | |
| // If there is already content in the main content, add a line break | |
| if (content !== '') content += ' <br>'; | |
| // Add the header cell content | |
| content += '<b>Perfekt:</b> '; | |
| content += cell.innerText.replace('\n', ' / ').trim(); | |
| if (cell.parentElement.querySelector('td a[title="haben"]') !== null) | |
| content += ' haben'; | |
| else if (cell.parentElement.querySelector('td a[title="sein"]') !== null) | |
| content += ' <span style="color: rgb(255, 0, 0);"><b>sein</b></span>'; | |
| return; | |
| } | |
| // For all other cells, continue with the normal approach | |
| if (rowContent !== '') rowContent += ' - '; | |
| if (cell.parentElement.querySelector('td small') !== null && | |
| (cell.parentElement.querySelector('td small').innerText.trim() === 'Singular' || cell.parentElement.querySelector('td small').innerText.trim() === 'Plural')) | |
| { | |
| // If the row is a singular or plural form (in non-noun words), add the corresponding label | |
| rowContent += '<i>' + cell.parentElement.querySelector('td small').innerText.trim() + '</i>: '; | |
| } | |
| rowContent += cell.innerText.replace('\n', ' / ').trim(); | |
| }); | |
| } else if (isNoun) { | |
| row.querySelectorAll('td').forEach((cell) => { | |
| if (cell.innerText.trim() === '') return; | |
| // For nouns, just the first cell (Nominativ Singular) is enough, the rest can be skipped | |
| if (content !== '') return; | |
| content += cell.innerText.replace('\n', ' / ').trim(); | |
| }); | |
| } else if (isAdjective) { | |
| row.querySelectorAll('td').forEach((cell) => { | |
| if (cell.innerText.trim() === '') return; | |
| if (rowContent !== '') rowContent += ' - '; | |
| rowContent += cell.innerText.replace('\n', ' / ').trim(); | |
| }); | |
| } | |
| }); | |
| // If there any content left in the row, add it to the main content | |
| if (rowContent !== '') | |
| content += rowContent.trim(); | |
| return content.trim(); | |
| } | |
| /** | |
| * Extracts the content of a title section as a formatted string. | |
| * | |
| * @param {Element} title - The title element to extract content from. | |
| * | |
| * @returns {string} - The formatted content of the title section. | |
| */ | |
| function getTitleContent(title) | |
| { | |
| if (title == null) return ''; | |
| let content = '<b>' + title.innerText.trim() + '</b> <br>'; | |
| let nextSibling = title.nextElementSibling; | |
| if (nextSibling == null) return content; // No content after the title | |
| if (nextSibling.tagName.toLowerCase() === 'ul') | |
| { | |
| content += ' ' + keepItalicWords(nextSibling).trim() + ' <br>'; | |
| nextSibling = nextSibling.nextElementSibling; // Move to the next sibling | |
| } | |
| if (nextSibling.tagName.toLowerCase() === 'span') | |
| nextSibling = nextSibling.nextElementSibling; // Move to the next sibling | |
| nextSibling.querySelectorAll('dd').forEach((dd) => { | |
| content += ' ' + removeCites(keepItalicWords(dd)).trim() + ' <br>'; | |
| dd.querySelectorAll('dl').forEach((dl) => { | |
| content += ' ' + removeCites(keepItalicWords(dl)).trim() + ' <br>'; | |
| }); | |
| }); | |
| return content; | |
| } | |
| /** | |
| * Keeps the content inside <i> tags and remove everything else using innerText. | |
| * | |
| * @param {Element} element - The element to process. | |
| * | |
| * @returns {string} | |
| */ | |
| function keepItalicWords(element) | |
| { | |
| let allItalicWords = Array.from(element.querySelectorAll('i')).map(i => i.innerText); | |
| let result = element.innerText; | |
| allItalicWords.forEach((word) => { | |
| // Replace the first occurrence of the (exact) word with the italicized version | |
| // Note that the word might contain parentheses or special characters | |
| word = word.replaceAll(/[.*+?^${}()|[\]\\]/g, '\\$&'); // Escape special characters | |
| result = result.replaceAll(new RegExp(`\\b${word}\\b(?!<\/i>)`, 'g'), `<i>${word}</i>`).trim(); | |
| }); | |
| return result; | |
| } | |
| /** | |
| * Removes citation markers from the content. | |
| * | |
| * @param {string} content | |
| */ | |
| function removeCites(content) | |
| { | |
| let beginningBracket = ''; | |
| // Only remove citation markers from the middle or at the end of the content, don't remove the brackets that exist in the beginning of some lines | |
| if (content.startsWith('[') && content.indexOf(']') > 0) | |
| { | |
| beginningBracket = content.substring(0, content.indexOf(']') + 1).trim() + ' '; | |
| content = content.substring(content.indexOf(']') + 1).trim(); | |
| } | |
| return beginningBracket + content.replace(/\[\d+\]/g, '').replace('[Quellen fehlen]', '').trim(); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment