Demonstration of why .iterdescendants() is wrong for stackoverflow question 6123351
>>> import lxml.html
>>> h = lxml.html.fromstring('<html><body><p>one <b>two</b></p><P>three <b>four</b></p></body></html>')
>>> lxml.html.tostring(h)
'<html><body><p>one <b>two</b></p><p>three <b>four</b></p></body></html>'
>>> ''.join([lxml.html.tostring(c) for c in h.body.iterdescendants()])
'<p>one <b>two</b></p><b>two</b><p>three <b>four</b></p><b>four</b>'
>>> ''.join([lxml.html.tostring(c) for c in h.body.iterchildren()])
'<p>one <b>two</b></p><p>three <b>four</b></p>'
