Created
July 13, 2025 18:05
-
-
Save ddanielgal/af66211a7793198a4457244d3ea50aa8 to your computer and use it in GitHub Desktop.
Pandoc script to fix links in articles downloaded with MarkDownload
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- Track paragraphs to remove | |
| local paragraphs_to_remove = {} | |
| -- First pass: identify broken link patterns | |
| function identify_broken_links(doc) | |
| local blocks = doc.blocks | |
| for i = 1, #blocks do | |
| if blocks[i].t == "Para" then | |
| local text = pandoc.utils.stringify(blocks[i]) | |
| -- Mark standalone [ paragraphs for removal | |
| if text:match("^%s*%[%s*$") then | |
| paragraphs_to_remove[i] = true | |
| end | |
| -- Mark standalone ](...) paragraphs for removal | |
| if text:match("^%s*%]%(") then | |
| paragraphs_to_remove[i] = true | |
| end | |
| end | |
| end | |
| return doc | |
| end | |
| -- Second pass: filter paragraphs and process content | |
| function Para(el) | |
| local text = pandoc.utils.stringify(el) | |
| -- Remove paragraph with only `[` | |
| if text:match("^%s*%[%s*$") then | |
| return {} | |
| end | |
| -- Remove paragraph starting with `](` — broken link tail | |
| if text:match("^%s*%]%(") then | |
| return {} | |
| end | |
| local new_inlines = {} | |
| for _, inline in ipairs(el.content) do | |
| if inline.t == "Str" then | |
| -- Convert ![[...]] to markdown image (wikilink format) | |
| local filename = inline.text:match("^!%[%[(.-)%]%]$") | |
| if filename then | |
| table.insert(new_inlines, pandoc.Image("", filename)) | |
| else | |
| -- Handle partial wikilink conversions in strings | |
| local text = inline.text | |
| -- Convert ![[filename]] patterns within text | |
| text = text:gsub("!%[%[(.-)%]%]", function(filename) | |
| return "" | |
| end) | |
| table.insert(new_inlines, pandoc.Str(text)) | |
| end | |
| else | |
| table.insert(new_inlines, inline) | |
| end | |
| end | |
| return pandoc.Para(new_inlines) | |
| end | |
| -- Filter for the entire document to handle multi-paragraph patterns | |
| function Pandoc(doc) | |
| local new_blocks = {} | |
| local blocks = doc.blocks | |
| for i = 1, #blocks do | |
| if blocks[i].t == "Para" then | |
| local text = pandoc.utils.stringify(blocks[i]) | |
| -- Skip standalone [ paragraphs | |
| if text:match("^%s*%[%s*$") then | |
| -- Skip this paragraph | |
| -- Skip standalone ](...) paragraphs | |
| elseif text:match("^%s*%]%(") then | |
| -- Skip this paragraph | |
| else | |
| table.insert(new_blocks, blocks[i]) | |
| end | |
| else | |
| table.insert(new_blocks, blocks[i]) | |
| end | |
| end | |
| return pandoc.Pandoc(new_blocks, doc.meta) | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment