NS = {'tei': ''}
def tei_to_fulltext(tei):
from lxml import etree
from six import text_type
parser = etree.XMLParser(encoding='UTF-8', recover=True)
tei = tei if not isinstance(tei, text_type) else tei.encode('utf-8')
root = etree.fromstring(tei, parser)
return get_fulltext(root)
def get_fulltext(root):
"""TODO: move it to invenio_grobid`"""
return ' '.join(root.xpath(
