Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Markdown to Plaintext in Python
from bs4 import BeautifulSoup
from markdown import markdown
def markdown_to_text(markdown_string):
""" Converts a markdown string to plaintext """
# md -> html -> text since BeautifulSoup can extract text cleanly
html = markdown(markdown_string)
# remove code snippets
html = re.sub(r'<pre>(.*?)</pre>', ' ', html)
html = re.sub(r'<code>(.*?)</code >', ' ', html)
# extract text
soup = BeautifulSoup(html, "html.parser")
text = ''.join(soup.findAll(text=True))
return text
@hemikak

This comment has been minimized.

Copy link

hemikak commented Aug 19, 2018

 html = re.sub(r'<code>(.*?)</code>', ' ', html)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.