Extract the plain text from markdown, for plain text search.
import commonmark
with open('', 'r') as myfile:
text =
parser = commonmark.Parser()
ast = parser.parse(text)
# Returns the text from markdown, stripped of the markdown syntax itself
def ast2text(astNode):
walker = astNode.walker()
acc = "";
iterator = iter(walker)
while True:
(current, entering) = next(iterator)
except StopIteration:
break # Iterator exhausted: stop the loop
print("- - - - - - -")
print(f"{entering} \"{current.t}\": \"{current.literal}\"")
# Add the text
if current.literal:
acc += current.literal
# Add in the missing line breaks
if current.t == "linebreak":
acc += "\n"
if current.t == "paragraph" and entering == False:
acc += "\n\n"
if current.t == "heading" and entering == False:
acc += "\n"
return acc.strip()
