Skip to content

Instantly share code, notes, and snippets.

@waylan
Created June 19, 2020 18:31
Show Gist options
  • Save waylan/ba297e1c46dc5a16cac6347387bc1452 to your computer and use it in GitHub Desktop.
Save waylan/ba297e1c46dc5a16cac6347387bc1452 to your computer and use it in GitHub Desktop.
Markdown to Markdown using Mistune (experimental and incomplete)
from mistune import create_markdown
from mistune.renderers import BaseRenderer
import re
ESCAPE_CHAR = re.compile(r'(?<!\\)([\\`*_()\[\]#+-])')
UL_BULLET = re.compile(r'(?<=^)(\*)( +)', re.MULTILINE)
def indent(text, level, tab_length=4):
''' Indent block of text by level '''
return '\n'.join(f'{" "*tab_length*level}{line}' for line in text.split('\n'))
class MdRenderer(BaseRenderer):
NAME = 'md'
IS_TREE = False
def text(self, text):
# TODO: escaping is probably more agressive than it needs to be.
return ESCAPE_CHAR.sub(r'\\\1', text)
def link(self, link, text=None, title=None):
if link == text or ('@' in text and link.startswith('mailto:') and link[7:] == text):
# Autolink
return f'<{text}>'
text = link if text is None else text
title = f' "{title}"' if title is not None else ''
return f'[{text}]({link}{title})'
def image(self, src, alt="", title=None):
title = f' "{title}"' if title is not None else ''
return f'![{alt}]({src}{title})'
def emphasis(self, text):
return f'*{text}*'
def strong(self, text):
return f'**{text}**'
def codespan(self, text):
# TODO: account for double backticks in code span.
if '`' in text:
return f'`` {text} ``'
return f'`{text}`'
def linebreak(self):
return ' \n'
def inline_html(self, html):
return html
def paragraph(self, text):
return f'{text}\n\n'
def heading(self, text, level):
return f'{"#"*level} {text}\n\n'
def newline(self):
return '\n'
def thematic_break(self):
return '- - -\n\n'
def block_text(self, text):
return text
def block_code(self, code, info=None):
info = info or ''
code = code.rstrip('\n')
return f'```{info}\n{code}\n```\n\n'
def block_quote(self, text):
return '\n'.join([f'> {line}' for line in text.strip().splitlines()]) + '\n'
def block_html(self, html):
return f'{html}\n\n'
def block_error(self, html):
# TODO: this is non-standard. Maybe ignore?
return f'<div class="error">{html}</div>\n\n'
def list(self, text, ordered, level, start=None):
if ordered:
# convert `*` to `1.` in each list item using `start`.
# TODO: make this increment. But how?
start = f'{start}. ' if start is not None else '1. '
text = UL_BULLET.sub(f'{start:<4}', text)
if level == 1:
# Adjust blank lines for level 1 lists
# TODO: fix some edge cases with nested lists
text = text.lstrip('\n')
text += '\n\n'
return text
def list_item(self, text, level):
if '\n' in text:
# Indent all lines after the first line.
firstline, therest = text.split('\n', 1)
text = '\n'.join([firstline, indent(therest, 1)])
# The linebreak goes at the front for nested items
return f'\n* {text}'
md2md = create_markdown(escape=False, renderer=MdRenderer())
if __name__ == '__main__':
import sys
if len(sys.argv) > 1:
if sys.argv[1] == '-':
src = sys.stdin.read()
else:
with open(sys.argv[1]) as f:
src = f.read()
print(md2md(src))
else:
print(f'usage: {sys.argv[0]} FILE|-')
@waylan
Copy link
Author

waylan commented Jun 19, 2020

lepture/mistune#208 may be relevant to this.

Also, note that this uses f-strings (see PEP 498), which are only supported in Python 3.6+.

@skyzh
Copy link

skyzh commented Jun 23, 2020

Hi @waylan ! Here at SJTU-CSE/awesome-cs#31 , I made use of your script to automatically validate if items in Markdown list are sorted in dict order, and pretty-printed the validator result out. Thank you for this awesome work!

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment