Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Extract the plain text from markdown, for plain text search.
import commonmark
with open('', 'r') as myfile:
text =
parser = commonmark.Parser()
ast = parser.parse(text)
# Returns the text from markdown, stripped of the markdown syntax itself
def ast2text(astNode):
walker = astNode.walker()
acc = "";
iterator = iter(walker)
while True:
(current, entering) = next(iterator)
except StopIteration:
break # Iterator exhausted: stop the loop
print("- - - - - - -")
print(f"{entering} \"{current.t}\": \"{current.literal}\"")
# Add the text
if current.literal:
acc += current.literal
# Add in the missing line breaks
if current.t == "linebreak":
acc += "\n"
if current.t == "paragraph" and entering == False:
acc += "\n\n"
if current.t == "heading" and entering == False:
acc += "\n"
return acc.strip()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.