Skip to content

Instantly share code, notes, and snippets.

@mdaniel
Created February 17, 2014 06:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mdaniel/9045850 to your computer and use it in GitHub Desktop.
Save mdaniel/9045850 to your computer and use it in GitHub Desktop.
Visualize and run some queries on the tree that JSoup produces
package org.jsoup.examples;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import javax.swing.*;
import javax.swing.tree.*;
import java.awt.*;
import java.awt.event.*;
import java.io.File;
import java.io.IOException;
import java.util.Map;
import java.util.WeakHashMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import static java.lang.String.format;
public class JSoupFrame extends JFrame
{
File lastChooserDir;
JTree tree;
private static final Logger LOG = Logger.getLogger(JSoupFrame.class.getName());
public static void main(String[] args) throws Exception {
System.setProperty("com.apple.macos.useScreenMenuBar", "true");
System.setProperty("apple.laf.useScreenMenuBar", "true");
SwingUtilities.invokeAndWait(new Runnable() {
public void run() {
JSoupFrame f = new JSoupFrame();
f.setVisible(true);
}
});
}
public JSoupFrame() {
lastChooserDir = new File(System.getProperty("user.dir"));
setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
final Dimension dim = new Dimension(640, 480);
setPreferredSize(dim);
setMinimumSize(dim);
tree = new JTree();
JMenuBar jmb = new JMenuBar();
setJMenuBar(jmb);
final JMenu fileMenu = new JMenu("File");
jmb.add(fileMenu);
final JMenu selectMenu = new JMenu("Select");
jmb.add(selectMenu);
final JMenuItem openItem = new JMenuItem("Open");
openItem.setMnemonic('O');
openItem.setAccelerator(KeyStroke.getKeyStroke("meta O"));
fileMenu.add(openItem);
final JMenuItem cssMI = new JMenuItem("CSS");
cssMI.setMnemonic('S');
cssMI.setAccelerator(KeyStroke.getKeyStroke("meta S"));
selectMenu.add(cssMI);
openItem.addActionListener(new OpenFileAction());
cssMI.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent ae) {
final String selector =
JOptionPane.showInputDialog(JSoupFrame.this, "CSS Selector?", "");
if (null != selector && ! selector.isEmpty()) {
runSelection(selector);
}
}
});
final JLabel openLabel = new JLabel("Open a File");
openLabel.addMouseListener(new MouseAdapter() {
@Override
public void mouseClicked(MouseEvent e) {
new OpenFileAction().actionPerformed(null);
}
});
getContentPane().add(openLabel);
}
private class OpenFileAction implements ActionListener
{
public void actionPerformed(ActionEvent ae) {
JFileChooser jfc = new JFileChooser(lastChooserDir);
jfc.setFileSelectionMode(JFileChooser.FILES_ONLY);
if (JFileChooser.APPROVE_OPTION == jfc.showOpenDialog(JSoupFrame.this)) {
File f = jfc.getSelectedFile();
lastChooserDir = f.getParentFile();
runFile(f);
}
}
}
public void runSelection(String selector) {
final SoupTreeModel soupModel = (SoupTreeModel) tree.getModel();
// Document doc = (Document)((SoupTreeNode) soupModel.getRoot()).getUserObject();
// LOG.info(format("sel<%s>:=%s", selector, doc.select(selector).text()));
final TreePath[] paths = TreeHelpers.selectorToPath(soupModel.select(selector));
boolean first = true;
for (TreePath tp : paths) {
System.err.println("--Path");
for (Object o : tp.getPath()) {
SoupTreeNode tn = (SoupTreeNode)o;
Node n = tn.node();
StringBuilder sb = new StringBuilder();
if (n.hasAttr("id")) {
sb.append(' ').append("#").append(n.attr("id"));
} else if (n.hasAttr("itemtype")) {
sb.append("[itemtype=\"").append(n.attr("itemtype")).append(']');
} else if (n.hasAttr("itemprop")) {
sb.append("[itemprop=\"").append(n.attr("itemprop")).append(']');
} else if (n.hasAttr("class")) {
String cssClass = n.attr("class");
final String[] parts = cssClass.split("\\p{Blank}");
if (parts.length > 0) {
sb.append(' ').append(".").append(parts[0]);
}
} else {
int count = 1;
Node sib = n.previousSibling();
while (null != sib) {
sib = sib.previousSibling();
if (null != sib && sib.nodeName().equals(n.nodeName())) {
count++;
}
}
sb.append('[').append(count).append(']');
}
if (!first) {
System.err.printf("/");
}
System.err.printf("<%s%s>", n.nodeName(), sb);
first = false;
}
System.err.printf("%n--Path--%n");
}
for (TreePath tp : paths) {
tree.makeVisible(tp);
}
// tree.addSelectionPaths(paths);
}
public void runFile(File f) {
String charset = null;
//noinspection ConstantConditions
runFile(f, charset);
}
public void runFile(File f, String charset) {
final Container contentPane = getContentPane();
try {
runFileIOE(f, charset);
} catch (IOException ex) {
LOG.log(Level.SEVERE, "Kaboom", ex);
contentPane.removeAll();
contentPane.add(new JLabel("Kaboom: "+ex));
}
}
public void runFileIOE(File f, String charset) throws IOException {
final Container contentPane = getContentPane();
final Document doc = Jsoup.parse(f, charset);
doc.outputSettings(
doc.outputSettings()
.prettyPrint(true)
.indentAmount(2)
.syntax(Document.OutputSettings.Syntax.xml));
contentPane.removeAll();
JTabbedPane jtp = new JTabbedPane();
final SoupTreeModel soupModel = new SoupTreeModel(doc);
tree.setModel(soupModel);
tree.setExpandsSelectedPaths(true);
jtp.add("tree",
new JScrollPane(tree
, ScrollPaneConstants.VERTICAL_SCROLLBAR_ALWAYS
, ScrollPaneConstants.HORIZONTAL_SCROLLBAR_ALWAYS));
final String html = doc.toString();
final JTextArea jta = new JTextArea(html);
jta.setFont(Font.decode("Courier New-PLAIN-9"));
jta.setEditable(false);
jta.setLineWrap(true);
jtp.add("source", jta);
contentPane.add(jtp);
contentPane.setVisible(false);
contentPane.setVisible(true);
final String selector = ".meet-business-owner";
runSelection(selector);
}
public static class SoupTreeModel extends DefaultTreeModel
{
public SoupTreeModel(Node doc) {
super(SoupTreeNode.forNode(doc));
}
private Node objectToNode(Object o) {
if (o instanceof Node) {
return (Node)o;
} else if (o instanceof SoupTreeModel) {
return objectToNode(((SoupTreeModel) o).root);
} else if (o instanceof SoupTreeNode) {
return (Node)((SoupTreeNode)o).getUserObject();
} else {
throw new IllegalArgumentException(
format("Quoi?<%s>", null == o ? "NULL" : o.getClass().getName()));
}
}
public SoupTreeNode[] select(String cssQuery) {
final Elements elements = ((Document) objectToNode(this.root)).select(cssQuery);
SoupTreeNode[] results = new SoupTreeNode[elements.size()];
int i = 0;
for (Node el : elements) {
results[i++] = SoupTreeNode.forNode(el);
}
return results;
}
@Override
public boolean isLeaf(Object node) {
Node n = objectToNode(node);
return TreeHelpers.isLeaf(n);
}
@Override
public int getChildCount(Object parent) {
Node n = objectToNode(parent);
return TreeHelpers.getChildCount(n);
}
@Override
public Object getChild(Object parent, int index) {
Node n = objectToNode(parent);
return new SoupTreeModel(TreeHelpers.getChild(n, index));
}
@Override
public int getIndexOfChild(Object parent, Object child) {
Node n = objectToNode(parent);
Node ch = objectToNode(child);
return TreeHelpers.getIndexOfChild(n, ch);
}
@Override
public String toString() {
final Node self = objectToNode(root);
return self.nodeName() + "[" + self.attributes() + "]";
}
}
public static class SoupTreeNode extends DefaultMutableTreeNode
{
private static Map<Node, SoupTreeNode> cache = new WeakHashMap<Node, SoupTreeNode>();
public static SoupTreeNode forNode(Node n) {
SoupTreeNode result;
synchronized (SoupTreeNode.class) {
if (cache.containsKey(n)) {
result = cache.get(n);
} else {
result = new SoupTreeNode(n);
cache.put(n, result);
}
}
return result;
}
private SoupTreeNode(Node userObject) {
super(userObject);
}
private Node node() {
return (Node)this.userObject;
}
@Override
public TreeNode getParent() {
Node parent = node().parent();
if (null == parent) {
return null;
}
return forNode(parent);
}
@Override
public boolean isLeaf() {
return TreeHelpers.isLeaf(node());
}
@Override
public int getChildCount() {
return TreeHelpers.getChildCount(node());
}
@Override
public TreeNode getChildAt(int index) {
return forNode(TreeHelpers.getChild(node(), index));
}
@Override
public int getIndex(TreeNode aChild) {
return TreeHelpers.getIndexOfChild(node(), ((SoupTreeNode) aChild).node());
}
@Override
public String toString() {
return node().nodeName();
}
}
public static class TreeHelpers {
public static TreePath[] selectorToPath(SoupTreeNode[] leaves) {
if (null == leaves) {
return new TreePath[0];
}
final TreePath[] results = new TreePath[leaves.length];
for (int i = 0; i < leaves.length; i++) {
results[i] = new TreePath(leaves[i].getPath());
}
return results;
}
public static boolean isLeaf(Node node) {
return 0 == node.childNodeSize();
}
public static int getChildCount(Node parent) {
return parent.childNodeSize();
}
public static Node getChild(Node parent, int index) {
return parent.childNode(index);
}
public static int getIndexOfChild(Node parent, Object child) {
for (int i = 0, len = parent.childNodeSize(); i < len; i++) {
final Node n = parent.childNode(i);
if (n.equals(child)) {
return i;
}
}
return -1;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment