Skip to content

Instantly share code, notes, and snippets.

@mdaniel
Created February 17, 2014 06:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mdaniel/9045814 to your computer and use it in GitHub Desktop.
Save mdaniel/9045814 to your computer and use it in GitHub Desktop.
Use PyQt to visualize what BeautifulSoup4 sees when it ingests the html; the jsoup flavor of this actually allows one to run queries, so hopefully this one will, too (any minute now)
#! /usr/bin/env python
# -*- coding: utf-8 -*-
__docformat__ = 'reStructuredText'
import sys
from bs4 import BeautifulSoup, NavigableString, Tag
from PyQt5.Qt import (
QApplication, QMainWindow, QMenuBar, QMenu, QTreeWidget, QTreeWidgetItem)
class MyMain(QMainWindow):
def __init__(self, parent=None):
super(MyMain, self).__init__(parent)
mb = QMenuBar(self)
mb.setNativeMenuBar(False)
qm = QMenu('File', self)
open_act = qm.addAction('Open')
# qm.addMenu('Foo')
# mb.addMenu(qm)
self.setMenuBar(mb)
tw = QTreeWidget(self)
tw.setColumnCount(1)
tw.setHeaderLabel('Elements')
html_fn = sys.argv[1]
with open(html_fn) as fh:
soup = BeautifulSoup(fh.read().decode('utf-8'))
def make_tree(p, t):
"""
:type p: QTreeWidgetItem
:type t: Tag
"""
for t_ch in t.children:
#: :type: bs4.Tag
t_ch = t_ch
if isinstance(t_ch, NavigableString):
ch = repr(unicode(t_ch))
t_ch = None
elif t_ch.hidden:
ch = 'Hidden<%r>' % t_ch
else:
if t_ch.namespace:
ch = '{%s}%s' % (t_ch.namespace, t_ch.name)
else:
ch = t_ch.name
if t_ch.attrs:
ch = '<%s %s>'\
% (ch, ' '.join(['%s=%s' % it
for it in t_ch.attrs.items()]))
ch_twi = QTreeWidgetItem(None, [ch])
if t_ch is not None:
make_tree(ch_twi, t_ch)
p.addChild(ch_twi)
twi = QTreeWidgetItem(None, [u'Root'])
make_tree(twi, soup)
tw.insertTopLevelItems(0, [twi])
self.setCentralWidget(tw)
def main(argv):
app = QApplication(argv)
mine = MyMain()
mine.show()
app.exec_()
if __name__ == '__main__':
main(sys.argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment