Created
July 15, 2015 00:38
-
-
Save waylan/f2489900202f530edc68 to your computer and use it in GitHub Desktop.
BeautifulSoup Document without parser - Currently broken!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4.element import Tag, NavigableString | |
class Doc(Tag): | |
""" | |
Dumby Document Root. | |
This class provides a document root object without a parser. | |
The document will need to be built mannually by adding Tags, | |
NavigableStrings, Comments and the like. This class assumes | |
HTML (not XML) and offers no options. | |
By default it serializes to a string using the "html" formatter | |
and "pretty_print". It only outputs Unicode strings. | |
""" | |
def __init__(self): | |
self.name = "document" | |
self.hidden = True | |
self.is_xml = False | |
self.builder = None | |
self.parser_class = None | |
self.namespace = None | |
self.prefix = None | |
self.attrs = {} | |
self.contents = [] | |
def new_tag(self, name, namespace=None, nsprefix=None, **attrs): | |
"""Create a new tag associated with this soup.""" | |
return Tag(None, self.builder, name, namespace, nsprefix, attrs) | |
def new_string(self, s, subclass=NavigableString): | |
"""Create a new NavigableString associated with this soup.""" | |
return subclass(s) | |
def insert_before(self, successor): | |
raise NotImplementedError("Doc objects don't support insert_before().") | |
def insert_after(self, successor): | |
raise NotImplementedError("Doc objects don't support insert_after().") | |
def decode(self, pretty_print=True, formatter="html"): | |
""" | |
Return a Unicode string of this document. | |
""" | |
indent_level = 0 if pretty_print else None | |
return super(Doc, self).decode(indent_level, None, formatter) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from unittest import TestCase | |
import soup | |
class TestSoup(TestCase): | |
def test_doc(self): | |
doc = soup.Doc() | |
self.assertEqual(doc.decode(), '') | |
def test_append(self): | |
doc = soup.Doc() | |
p = doc.new_tag('p', attrs={'class':'foo'}) | |
text = doc.new_string('Some paragraph text') | |
p.append(text) | |
doc.append(p) | |
self.assertEqual(p.parent, doc) | |
self.assertEqual( | |
doc.prettify(), | |
'<p class="foo">Some paragraph text</p>' | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment