napoler/markdown_to_text.py

## markdown_to_text.py
from bs4 import BeautifulSoup
from markdown import markdown

def markdown_to_text(markdown_string):
    """ Converts a markdown string to plaintext """

    # md -> html -> text since BeautifulSoup can extract text cleanly
    html = markdown(markdown_string)

    # remove code snippets
    html = re.sub(r'<pre>(.*?)</pre>', ' ', html)
    html = re.sub(r'<code>(.*?)</code >', ' ', html)

    # extract text
    soup = BeautifulSoup(html, "html.parser")
    text = ''.join(soup.findAll(text=True))

    return text
	from bs4 import BeautifulSoup
	from markdown import markdown

	def markdown_to_text(markdown_string):
	""" Converts a markdown string to plaintext """

	# md -> html -> text since BeautifulSoup can extract text cleanly
	html = markdown(markdown_string)

	# remove code snippets
	html = re.sub(r'<pre>(.*?)</pre>', ' ', html)
	html = re.sub(r'<code>(.*?)</code >', ' ', html)

	# extract text
	soup = BeautifulSoup(html, "html.parser")
	text = ''.join(soup.findAll(text=True))

	return text