Skip to content

Instantly share code, notes, and snippets.

@waylan
Created July 15, 2015 00:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save waylan/f2489900202f530edc68 to your computer and use it in GitHub Desktop.
Save waylan/f2489900202f530edc68 to your computer and use it in GitHub Desktop.
BeautifulSoup Document without parser - Currently broken!
from bs4.element import Tag, NavigableString
class Doc(Tag):
"""
Dumby Document Root.
This class provides a document root object without a parser.
The document will need to be built mannually by adding Tags,
NavigableStrings, Comments and the like. This class assumes
HTML (not XML) and offers no options.
By default it serializes to a string using the "html" formatter
and "pretty_print". It only outputs Unicode strings.
"""
def __init__(self):
self.name = "document"
self.hidden = True
self.is_xml = False
self.builder = None
self.parser_class = None
self.namespace = None
self.prefix = None
self.attrs = {}
self.contents = []
def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
"""Create a new tag associated with this soup."""
return Tag(None, self.builder, name, namespace, nsprefix, attrs)
def new_string(self, s, subclass=NavigableString):
"""Create a new NavigableString associated with this soup."""
return subclass(s)
def insert_before(self, successor):
raise NotImplementedError("Doc objects don't support insert_before().")
def insert_after(self, successor):
raise NotImplementedError("Doc objects don't support insert_after().")
def decode(self, pretty_print=True, formatter="html"):
"""
Return a Unicode string of this document.
"""
indent_level = 0 if pretty_print else None
return super(Doc, self).decode(indent_level, None, formatter)
from unittest import TestCase
import soup
class TestSoup(TestCase):
def test_doc(self):
doc = soup.Doc()
self.assertEqual(doc.decode(), '')
def test_append(self):
doc = soup.Doc()
p = doc.new_tag('p', attrs={'class':'foo'})
text = doc.new_string('Some paragraph text')
p.append(text)
doc.append(p)
self.assertEqual(p.parent, doc)
self.assertEqual(
doc.prettify(),
'<p class="foo">Some paragraph text</p>'
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment