Skip to content

Instantly share code, notes, and snippets.

@waylan
Last active August 29, 2015 14:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save waylan/894713cc2369f8a6d46c to your computer and use it in GitHub Desktop.
Save waylan/894713cc2369f8a6d46c to your computer and use it in GitHub Desktop.
HTMLTree -- An HTML Node Tree toolkit. Warning! This is unfinished.
# -*- coding: utf-8 -*-
#
# HTMLTree
#
# An HTML Node Tree toolkit.
#
# --------------------------------------------------------------------
#
# Copyright (c) 2015 by Waylan Limberg. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of HTMLTree nor the names of its contributors may be
# used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY WAYLAN LIMBERG ''AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO HTMLTree
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# --------------------------------------------------------------------
#
# Parts borrowed from ElementTree.
#
# The ElementTree toolkit is
#
# Copyright (c) 1999-2007 by Fredrik Lundh
#
# By obtaining, using, and/or copying this software and/or its
# associated documentation, you agree that you have read, understood,
# and will comply with the following terms and conditions:
#
# Permission to use, copy, modify, and distribute this software and
# its associated documentation for any purpose and without fee is
# hereby granted, provided that the above copyright notice appears in
# all copies, and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of
# Secret Labs AB or the author not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
# prior permission.
#
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# OF THIS SOFTWARE.
# --------------------------------------------------------------------
from __future__ import unicode_literals
import sys
__all__ = [
'Element',
'Comment',
'Text',
'AtomicText',
'RawText',
'is_node',
'is_element',
'is_text',
'is_atomic_text',
'is_raw_text',
'is_comment',
'to_string',
]
if sys.version_info[0] == 3: # pragma: no cover
text_type = str
else: # pragma: no cover
text_type = unicode # noqa
# --------------------------------------------------------------------
# Helpers
HTML_EMPTY = set([
'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
'img', 'input', 'isindex', 'link', 'meta' 'param'
])
HTML_BLOCK = set([
'p', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote',
'pre', 'table', 'dl', 'ol', 'ul', 'script', 'noscript', 'form',
'fieldset', 'iframe', 'math', 'hr', 'style', 'li', 'dt', 'dd',
'thead', 'tbody', 'tr', 'th', 'td', 'section', 'footer',
'header', 'group', 'figure', 'figcaption', 'aside', 'article',
'canvas', 'output', 'progress', 'video', 'nav'
])
def is_node(node):
"""
Returns True is object is a node.
"""
return isinstance(node, Node)
def is_element(node):
"""
Returns True if object is an element node.
"""
return isinstance(node, Element)
def is_text(node, strict=False):
"""
Returns True if object is a Text node.
If `strict` is True, the object must not be a subclass.
"""
if strict:
return isinstance(node, Text) and node.__class__ is Text
else:
return isinstance(node, Text)
def is_atomic_text(node):
"""
Returns True if object is an AtomicText node.
"""
return isinstance(node, AtomicText)
def is_raw_text(node):
"""
Returns True if object is a RawText node.
"""
return isinstance(node, RawText)
def is_comment(node):
"""
Returns True if object is a Comment node.
"""
return isinstance(node, Comment)
# --------------------------------------------------------------------
# Nodes
class Node(object):
"""
Base class for nodes.
All nodes inherit from this class. Do not use this class directly.
"""
parent = None
def __repr__(self):
return '<{0} node at {1:#x}>'.format(self.__class__.__name__, id(self))
class Comment(Node, text_type):
"""
Comment node.
Contains the text of an HTML Comment.
"""
class Text(Node, text_type):
"""
Text node.
Contains the text of a text node.
"""
class AtomicText(Text):
"""
AtomicText node.
Contains the text of an atomic text node.
Atomic text nodes are treated by the serializer as text nodes.
However, they are ignored/skipped by transformers.
"""
class RawText(Text):
"""
RawText node.
Contains the text of a raw text node, which the serializer serializes
as raw text. Be warned that no escaping of any kind is done to raw text.
"""
class Element(Node):
"""
An HTML Element Node
An element's length is the number of children (including text nodes).
The element tag, and attributes must be unicode strings.
When a child is added, that child's `parent` attribute is assigned
as a reference to the parent instance. When a child is removed,
the child's `parent` attribute is set to `None`.
`tag` is the element name. All additional keyword arguments are element
attributes. If tag is `None`, only its children will be serialized.
All text is contained in child Text or RawText nodes. The content of
RawText nodes will not be escaped when serialized. Therefore, use RawText
nodes to hold the content of "script" and "style" elements.
Text and RawText nodes cannot contain any children. Neither can any
Element nodes with tag names listed in HTML_EMPTY.
"""
tag = None
"""The element's name."""
attrib = None
"""Dictionary of the element's attributes."""
def __init__(self, tag=None, **attrib):
self.tag = tag
self.attrib = attrib
self._children = []
def copy(self):
"""
Return a shallow copy of current element.
Subelements will be shared with the original tree.
"""
node = self.__class__(self.tag, **self.attrib)
node[:] = self
return node
def __len__(self):
return len(self._children)
def __getitem__(self, index):
return self._children[index]
def __setitem__(self, index, node):
self._assert_can_contain_children()
if isinstance(index, slice):
for n in node:
self._assert_is_node(n)
n.parent = self
else:
self._assert_is_node(node)
node.parent = self
self._children[index] = node
def __delitem__(self, index):
if hasattr(self._children[index], 'parent'):
self._children[index].parent = None
del self._children[index]
def _assert_is_node(self, node):
if not is_node(node):
raise TypeError('expected a Node, not {0}'.format(type(node).__name__))
def _assert_can_contain_children(self):
if self.tag is not None and self.tag.lower() in HTML_EMPTY:
raise TypeError(
'{0} is an "empty" HTML element and cannot accept any children'.format(repr(self))
)
def append(self, node):
"""
Add child node to the end of this node's children.
"""
self._assert_can_contain_children()
self._assert_is_node(node)
node.parent = self
self._children.append(node)
def extend(self, nodes):
"""
Append child nodes from a sequence to end of this node's children.
"""
self._assert_can_contain_children()
for node in nodes:
self._assert_is_node(node)
node.parent = self
self._children.extend(nodes)
def insert(self, index, node):
"""
Insert child node at index.
"""
self._assert_can_contain_children()
self._assert_is_node(node)
node.parent = self
self._children.insert(index, node)
def remove(self, node):
"""
Remove matching child node.
ValueError is raised if a matching node could not be found.
"""
if hasattr(node, 'parent'):
node.parent = None
self._children.remove(node)
def clear(self):
"""
Reset Node. Remove all children and clear all attributes.
"""
self.attrib.clear()
self._children = []
def get(self, key, default=None):
"""
Get attribute of node or default.
"""
return self.attrib.get(key, default)
def set(self, key, value):
"""
Set attribute of node.
"""
self.attrib[key] = value
def keys(self):
"""
Get list of attribute names.
"""
return self.attrib.keys()
def items(self):
"""
Get element attributes as a list of (name, value) pairs.
"""
return self.attrib.items()
def add_class(self, value):
"""
Add a class name to the `class` attribute.
"""
value = ' '.join([self.get('class', ''), value]).strip()
self.set('class', value)
def remove_class(self, value):
"""
Remove a class name from the `class` attribute.
"""
classes = self.get('class', '').split()
if value in classes:
classes.remove(value)
self.set('class', ' '.join(classes).strip())
def iter(self, tags=None):
"""
Return a tree iterator of this node and all children in document order.
`tags` is a sequence of tag names of nodes which will be returned.
If `tags` is empty (the default), all nodes will be returned.
"""
tags = tags or []
if len(tags) == 0 or self.tag in tags:
yield self
for c in self._children:
for gc in c.iter(tags):
yield gc
def itertext(self, raw=False):
"""
Return a tree iterator of all children text nodes in document order.
Set `raw` to `True` to include RawText nodes.
"""
for child in self:
if is_raw_text(child) and not raw:
continue
if is_text(child):
yield child
elif is_element(child):
for gc in child.itertext():
yield gc
# --------------------------------------------------------------------
# Serialization
def _raise_serialization_error(text):
raise TypeError(
'cannot serialize {0} (type {1})'.format(repr(text), type(text).__name__)
)
def _newline_required(node, start=False):
tag = node.tag.lower()
if start:
# This is a start tag
if tag in HTML_BLOCK and tag not in HTML_EMPTY:
if tag in ['p', 'P']:
return False
if len(node) < 1:
return False
return True
return False
else:
# This is an end tag
if tag in HTML_BLOCK:
return True
if tag == 'br':
return True
if tag == 'img' and (node.parent is None or node.parent.tag not in ['p', 'P']):
return True
return False
def _escape_cdata(text):
# escape character data
try:
# it's worth avoiding do-nothing calls for strings that are
# shorter than 500 character, or so. assume that's, by far,
# the most common case in most applications.
if '&' in text:
text = text.replace('&', '&amp;')
if '<' in text:
text = text.replace('<', '&lt;')
if '>' in text:
text = text.replace('>', '&gt;')
return text
except (TypeError, AttributeError):
_raise_serialization_error(text)
def _escape_attrib(text):
# escape attribute value
try:
if '&' in text:
text = text.replace('&', '&amp;')
if '<' in text:
text = text.replace('<', '&lt;')
if '>' in text:
text = text.replace('>', '&gt;')
if '"' in text:
text = text.replace('"', '&quot;')
return text
except (TypeError, AttributeError):
_raise_serialization_error(text)
def _serialize_node(write, node, format):
if is_comment(node):
write('<!-- {0} -->'.format(_escape_cdata(node)))
elif is_raw_text(node):
write(node)
elif is_text(node):
write(_escape_cdata(node))
elif is_node(node):
tag = node.tag
if tag is None:
for n in node:
_serialize_node(write, n, format)
else:
write('<{0}'.format(tag))
attribs = sorted(node.items()) # lexical order
for k, v in attribs:
v = _escape_attrib(v)
if k == v and format == 'html':
# handle boolean attributes
write(' {0}'.format(v))
else:
write(' {0}="{1}"'.format(k, v))
if format == 'xhtml' and tag.lower() in HTML_EMPTY:
write(' />')
else:
write('>')
if _newline_required(node, start=True):
write('\n')
if tag.lower() not in HTML_EMPTY:
for n in node:
_serialize_node(write, n, format)
write('</{0}>'.format(tag))
if _newline_required(node):
write('\n')
else:
_raise_serialization_error(node)
def to_string(node, format='html'):
"""
Return a serialized string of a node and its children.
`format` may be one of "html" or "xhtml".
"""
data = []
write = data.append
_serialize_node(write, node, format)
return "".join(data)
#!/bin/sh
coverage run test_htree.py && coverage report -m --include=htree.py && flake8 --max-line-length=119 htree.py test_htree.py
import unittest
import textwrap
import htree
def dedent(text):
return textwrap.dedent(text).lstrip('\n')
class TestTypes(unittest.TestCase):
def test_Text_type(self):
node = htree.Text('some text')
self.assertTrue(htree.is_node(node))
self.assertTrue(htree.is_text(node))
self.assertTrue(htree.is_text(node, strict=True))
self.assertFalse(htree.is_atomic_text(node))
self.assertFalse(htree.is_raw_text(node))
self.assertFalse(htree.is_comment(node))
self.assertFalse(htree.is_element(node))
self.assertTrue(repr(node).startswith('<Text node at '))
self.assertEqual(node.parent, None)
def test_RawText_type(self):
node = htree.RawText('some text')
self.assertTrue(htree.is_node(node))
self.assertTrue(htree.is_text(node))
self.assertFalse(htree.is_text(node, strict=True))
self.assertFalse(htree.is_atomic_text(node))
self.assertTrue(htree.is_raw_text(node))
self.assertFalse(htree.is_comment(node))
self.assertFalse(htree.is_element(node))
self.assertTrue(repr(node).startswith('<RawText node at '))
self.assertEqual(node.parent, None)
def test_AtomicText_type(self):
node = htree.AtomicText('some text')
self.assertTrue(htree.is_node(node))
self.assertTrue(htree.is_text(node))
self.assertFalse(htree.is_text(node, strict=True))
self.assertTrue(htree.is_atomic_text(node))
self.assertFalse(htree.is_raw_text(node))
self.assertFalse(htree.is_comment(node))
self.assertFalse(htree.is_element(node))
self.assertTrue(repr(node).startswith('<AtomicText node at '))
self.assertEqual(node.parent, None)
def test_Comment_type(self):
node = htree.Comment('some text')
self.assertTrue(htree.is_node(node))
self.assertFalse(htree.is_text(node))
self.assertFalse(htree.is_text(node, strict=True))
self.assertFalse(htree.is_atomic_text(node))
self.assertFalse(htree.is_raw_text(node))
self.assertTrue(htree.is_comment(node))
self.assertFalse(htree.is_element(node))
self.assertTrue(repr(node).startswith('<Comment node at '))
self.assertEqual(node.parent, None)
def test_Element_type(self):
node = htree.Element()
self.assertTrue(htree.is_node(node))
self.assertFalse(htree.is_text(node))
self.assertFalse(htree.is_text(node, strict=True))
self.assertFalse(htree.is_atomic_text(node))
self.assertFalse(htree.is_raw_text(node))
self.assertFalse(htree.is_comment(node))
self.assertTrue(htree.is_element(node))
self.assertTrue(repr(node).startswith('<Element node at '))
self.assertEqual(node.parent, None)
self.assertEqual(node.tag, None)
def test_non_node(self):
obj = 'not a node'
self.assertFalse(htree.is_node(obj))
self.assertFalse(htree.is_text(obj))
self.assertFalse(htree.is_text(obj, strict=True))
self.assertFalse(htree.is_atomic_text(obj))
self.assertFalse(htree.is_raw_text(obj))
self.assertFalse(htree.is_comment(obj))
self.assertFalse(htree.is_element(obj))
class TestElement(unittest.TestCase):
def test_Element_init(self):
node = htree.Element('p', id='foo')
self.assertTrue(htree.is_element(node))
self.assertEqual(node.tag, 'p')
self.assertEqual(node.items(), [('id', 'foo')])
self.assertEqual(node[:], [])
def test_Element_copy(self):
node = htree.Element('p', id='foo')
node.append(htree.Text('some text)'))
copy = node.copy()
self.assertNotEqual(id(node), id(copy))
self.assertTrue(htree.is_element(node))
self.assertTrue(htree.is_element(copy))
self.assertEqual(node.tag, copy.tag)
self.assertEqual(node.items(), copy.items())
self.assertEqual(node[:], copy[:])
copy.set('class', 'bar')
self.assertNotEqual(node.items(), copy.items())
# TODO: test changes to children
def test_Element_len(self):
node = htree.Element('p')
self.assertEqual(len(node), 0)
node.append(htree.Text('some text'))
self.assertEqual(len(node), 1)
node.append(htree.Element('br'))
self.assertEqual(len(node), 2)
node.append(htree.Text('more text'))
self.assertEqual(len(node), 3)
def test_Element_getter(self):
node = htree.Element('p')
text = htree.Text('some text)')
node.append(text)
self.assertEqual(node[0], text)
def test_Element_setter(self):
node = htree.Element('p')
text = htree.Text('some text')
othertext = htree.Text('other text')
node.append(text)
self.assertEqual(len(node), 1)
node[0] = othertext
self.assertEqual(len(node), 1)
self.assertEqual(node[:], [othertext])
self.assertEqual(othertext.parent, node)
self.assertNotEqual(node[0], text)
def test_Element_setter_slice(self):
node = htree.Element('p')
text1 = htree.Text('text1')
text2 = htree.Text('text2')
node.extend([text1, text2])
self.assertEqual(len(node), 2)
text3 = htree.Text('text3')
text4 = htree.Text('text4')
node[1:2] = [text3, text4]
self.assertEqual(len(node), 3)
self.assertEqual(node[:], [text1, text3, text4])
def test_Element_setter_errors(self):
node = htree.Element('p')
text1 = htree.Text('text1')
with self.assertRaises(IndexError):
node[0] = text1
with self.assertRaises(TypeError):
node[0] = None
emptynode = htree.Element('br')
with self.assertRaises(TypeError):
emptynode[0] = text1
def test_Element_deleter(self):
node = htree.Element('p')
text1 = htree.Text('text1')
text2 = htree.Text('text2')
node.extend([text1, text2])
self.assertEqual(len(node), 2)
del node[1]
self.assertEqual(len(node), 1)
self.assertEqual(text2.parent, None)
self.assertEqual(text1.parent, node)
def test_Element_append(self):
node = htree.Element('p')
text1 = htree.Text('text1')
node.append(text1)
self.assertEqual(len(node), 1)
self.assertEqual(text1.parent, node)
text2 = htree.Text('text2')
node.append(text2)
self.assertEqual(len(node), 2)
self.assertEqual(text2.parent, node)
self.assertEqual(node[:], [text1, text2])
def test_Element_append_errors(self):
node = htree.Element('p')
text1 = htree.Text('text1')
with self.assertRaises(TypeError):
node.append(None)
emptynode = htree.Element('br')
with self.assertRaises(TypeError):
emptynode.append(text1)
def test_Element_extend(self):
node = htree.Element('p')
text1 = htree.Text('text1')
text2 = htree.Text('text2')
self.assertEqual(len(node), 0)
node.extend([text1, text2])
self.assertEqual(len(node), 2)
self.assertEqual(text1.parent, node)
self.assertEqual(text2.parent, node)
self.assertEqual(node[:], [text1, text2])
def test_Element_extend_errors(self):
node = htree.Element('p')
text1 = htree.Text('text1')
with self.assertRaises(TypeError):
node.extend([None])
emptynode = htree.Element('br')
with self.assertRaises(TypeError):
emptynode.extend([text1])
def test_Element_insert(self):
node = htree.Element('p')
text1 = htree.Text('text1')
text2 = htree.Text('text2')
text3 = htree.Text('text3')
self.assertEqual(len(node), 0)
node.insert(0, text1)
self.assertEqual(len(node), 1)
node.insert(1, text2)
self.assertEqual(len(node), 2)
node.insert(1, text3)
self.assertEqual(len(node), 3)
self.assertEqual(node[:], [text1, text3, text2])
def test_Element_insert_errors(self):
node = htree.Element('p')
text1 = htree.Text('text1')
with self.assertRaises(TypeError):
node.insert(None)
emptynode = htree.Element('br')
with self.assertRaises(TypeError):
emptynode.insert(text1)
def test_Element_remove(self):
node = htree.Element('p')
text1 = htree.Text('text1')
text2 = htree.Text('text2')
node.extend([text1, text2])
self.assertEqual(len(node), 2)
node.remove(text1)
self.assertEqual(len(node), 1)
self.assertEqual(text1.parent, None)
self.assertEqual(node[:], [text2])
def test_Element_clear(self):
node = htree.Element('p', id='foo')
node.append(htree.Text('some text)'))
node.clear()
self.assertEqual(node.tag, 'p')
self.assertEqual(node.items(), [])
self.assertEqual(node[:], [])
def test_Element_attrib_get(self):
node = htree.Element('p', id='foo')
self.assertEqual(node.get('id'), 'foo')
self.assertEqual(node.get('missing'), None)
self.assertEqual(node.get('missing', 'default'), 'default')
def test_Element_attrib_set(self):
node = htree.Element('p', id='foo')
node.set('id', 'bar')
node.set('class', 'baz')
self.assertEqual(node.items(), [('id', 'bar'), ('class', 'baz')])
def test_Element_attrib_keys(self):
node = htree.Element('p', **{'id': 'foo', 'class': 'baz'})
self.assertEqual(node.keys(), ['id', 'class'])
def test_Element_attrib_items(self):
node = htree.Element('p', **{'id': 'foo', 'class': 'baz'})
self.assertEqual(node.items(), [('id', 'foo'), ('class', 'baz')])
def test_Element_attrib_class(self):
node = htree.Element('p')
self.assertEqual(node.get('class'), None)
node.add_class('foo')
self.assertEqual(node.get('class'), 'foo')
node.add_class('bar')
self.assertEqual(node.get('class'), 'foo bar')
node.add_class('baz')
self.assertEqual(node.get('class'), 'foo bar baz')
node.remove_class('bar')
self.assertEqual(node.get('class'), 'foo baz')
node.remove_class('missing')
self.assertEqual(node.get('class'), 'foo baz')
def test_Element_iter(self):
p = htree.Element('p')
em = htree.Element('em')
strong = htree.Element('strong')
a1 = htree.Element('a')
a2 = htree.Element('a')
p.append(em)
em.append(strong)
strong.append(a1)
p.append(a2)
self.assertEqual(list(p.iter()), [p, em, strong, a1, a2])
self.assertEqual(list(p.iter('em')), [em])
self.assertEqual(list(p.iter('a')), [a1, a2])
self.assertEqual(list(p.iter('br')), [])
def test_Element_itertext(self):
p = htree.Element('p')
ptext = htree.RawText('ptext')
em = htree.Element('em')
emtext = htree.Text('emtext')
strong = htree.Element('strong')
strongtext = htree.Text('strongtext')
a1 = htree.Element('a')
a1text = htree.Text('a1text')
a2 = htree.Element('a')
a2text = htree.Text('a2text')
p.append(ptext)
p.append(em)
em.append(emtext)
em.append(strong)
strong.append(strongtext)
strong.append(a1)
a1.append(a1text)
p.append(a2)
a2.append(a2text)
self.assertEqual(list(p.itertext()), [emtext, strongtext, a1text, a2text])
self.assertEqual(
list(p.itertext(raw=True)),
[ptext, emtext, strongtext, a1text, a2text]
)
class TestSerializer(unittest.TestCase):
def test_Text_to_string(self):
node = htree.Text('some text')
self.assertEqual(htree.to_string(node), 'some text')
self.assertEqual(htree.to_string(node, format='xhtml'), 'some text')
def test_AtomicText_to_string(self):
node = htree.AtomicText('some text')
self.assertEqual(htree.to_string(node), 'some text')
self.assertEqual(htree.to_string(node, format='xhtml'), 'some text')
def test_RawText_to_string(self):
node = htree.RawText('some text')
self.assertEqual(htree.to_string(node), 'some text')
self.assertEqual(htree.to_string(node, format='xhtml'), 'some text')
def test_Comment_to_string(self):
node = htree.Comment('some text')
self.assertEqual(htree.to_string(node), '<!-- some text -->')
self.assertEqual(htree.to_string(node, format='xhtml'), '<!-- some text -->')
def test_Text_escape_to_string(self):
node = htree.Text('text & <tag>')
self.assertEqual(htree.to_string(node), 'text &amp; &lt;tag&gt;')
self.assertEqual(
htree.to_string(node, format='xhtml'),
'text &amp; &lt;tag&gt;'
)
def test_AtomicText_escape_to_string(self):
node = htree.AtomicText('"text" & <tag>')
self.assertEqual(htree.to_string(node), '"text" &amp; &lt;tag&gt;')
self.assertEqual(
htree.to_string(node, format='xhtml'),
'"text" &amp; &lt;tag&gt;'
)
def test_RawText_escape_to_string(self):
node = htree.RawText('"text" & <tag>')
self.assertEqual(htree.to_string(node), '"text" & <tag>')
self.assertEqual(htree.to_string(node, format='xhtml'), '"text" & <tag>')
def test_Comment_escape_to_string(self):
node = htree.Comment('"text" & <tag>')
self.assertEqual(htree.to_string(node), '<!-- "text" &amp; &lt;tag&gt; -->')
self.assertEqual(
htree.to_string(node, format='xhtml'),
'<!-- "text" &amp; &lt;tag&gt; -->'
)
def test_Element_empty_tag_is_None_to_string(self):
node = htree.Element()
self.assertEqual(htree.to_string(node), '')
self.assertEqual(htree.to_string(node, format='xhtml'), '')
def test_Element_empty_tag_is_empty_to_string(self):
node = htree.Element('br')
self.assertEqual(htree.to_string(node), '<br>\n')
self.assertEqual(htree.to_string(node, format='xhtml'), '<br />\n')
def test_Element_empty_tag_not_empty_to_string(self):
node = htree.Element('p')
self.assertEqual(htree.to_string(node), '<p></p>\n')
self.assertEqual(htree.to_string(node, format='xhtml'), '<p></p>\n')
def test_Element_with_text_tag_is_None_to_string(self):
node = htree.Element()
node.append(htree.Text('some text'))
self.assertEqual(htree.to_string(node), 'some text')
self.assertEqual(htree.to_string(node, format='xhtml'), 'some text')
def test_Element_with_child_tag_is_None_to_string(self):
node = htree.Element()
node.append(htree.Element('p'))
self.assertEqual(htree.to_string(node), '<p></p>\n')
self.assertEqual(htree.to_string(node, format='xhtml'), '<p></p>\n')
def test_Element_with_children_tag_is_None_to_string(self):
node = htree.Element()
node.extend([htree.Element('p'), htree.Element('br'), htree.Element('hr')])
self.assertEqual(htree.to_string(node), dedent(
'''
<p></p>
<br>
<hr>
'''
))
self.assertEqual(htree.to_string(node, format='xhtml'), dedent(
'''
<p></p>
<br />
<hr />
'''
))
def test_Element_with_text_to_string(self):
node = htree.Element('p')
node.append(htree.Text('some text'))
self.assertEqual(htree.to_string(node), '<p>some text</p>\n')
self.assertEqual(htree.to_string(node, format='xhtml'), '<p>some text</p>\n')
def test_Element_with_child_to_string(self):
node = htree.Element('div')
node.append(htree.Element('p'))
self.assertEqual(htree.to_string(node), dedent(
'''
<div>
<p></p>
</div>
'''
))
self.assertEqual(htree.to_string(node, format='xhtml'), dedent(
'''
<div>
<p></p>
</div>
'''
))
def test_Element_with_children_to_string(self):
node = htree.Element('div')
node.extend([htree.Element('div'), htree.Element('hr'), htree.Element('img')])
self.assertEqual(htree.to_string(node), dedent(
'''
<div>
<div></div>
<hr>
<img>
</div>
'''
))
self.assertEqual(htree.to_string(node, format='xhtml'), dedent(
'''
<div>
<div></div>
<hr />
<img />
</div>
'''
))
def test_Element_with_nested_children_to_string(self):
div = htree.Element('div')
p = htree.Element('p')
p.append(htree.Text('Some text '))
em = htree.Element('em')
em.append(htree.Text('with emphasis'))
p.append(em)
p.append(htree.Text('.'))
div.append(p)
self.assertEqual(htree.to_string(div), dedent(
'''
<div>
<p>Some text <em>with emphasis</em>.</p>
</div>
'''
))
self.assertEqual(htree.to_string(div, format='xhtml'), dedent(
'''
<div>
<p>Some text <em>with emphasis</em>.</p>
</div>
'''
))
def test_Element_with_attr_to_string(self):
p = htree.Element('p', id='foo')
img = htree.Element('img', src='example.jpg', alt='An image.')
p.append(img)
self.assertEqual(htree.to_string(p), dedent(
'''
<p id="foo"><img alt="An image." src="example.jpg"></p>
'''
))
self.assertEqual(htree.to_string(p, format='xhtml'), dedent(
'''
<p id="foo"><img alt="An image." src="example.jpg" /></p>
'''
))
def test_Element_with_bool_attr_to_string(self):
inpt = htree.Element('input', checked='checked', disabled='disabled')
inpt.set('name', 'foo')
inpt.set('type', 'checkbox')
self.assertEqual(
htree.to_string(inpt),
'<input checked disabled name="foo" type="checkbox">'
)
self.assertEqual(
htree.to_string(inpt, format='xhtml'),
'<input checked="checked" disabled="disabled" name="foo" type="checkbox" />'
)
def test_Element_with_attr_escape_to_string(self):
img = htree.Element('img', src='example.jpg', alt='"text" & <tag>')
self.assertEqual(htree.to_string(img), dedent(
'''
<img alt="&quot;text&quot; &amp; &lt;tag&gt;" src="example.jpg">
'''
))
self.assertEqual(htree.to_string(img, format='xhtml'), dedent(
'''
<img alt="&quot;text&quot; &amp; &lt;tag&gt;" src="example.jpg" />
'''
))
def test_Element_with_invalid_attr_to_string(self):
p = htree.Element('p', id=None)
self.assertRaises(TypeError, htree.to_string, p)
def test_invalid_node_to_string(self):
self.assertRaises(TypeError, htree.to_string, None)
def test_escape_cdata_invalid(self):
self.assertRaises(TypeError, htree._escape_cdata, None)
if __name__ == '__main__':
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment