Created
February 17, 2014 06:45
-
-
Save mdaniel/9045850 to your computer and use it in GitHub Desktop.
Visualize and run some queries on the tree that JSoup produces
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.jsoup.examples; | |
import org.jsoup.Jsoup; | |
import org.jsoup.nodes.Document; | |
import org.jsoup.nodes.Node; | |
import org.jsoup.select.Elements; | |
import javax.swing.*; | |
import javax.swing.tree.*; | |
import java.awt.*; | |
import java.awt.event.*; | |
import java.io.File; | |
import java.io.IOException; | |
import java.util.Map; | |
import java.util.WeakHashMap; | |
import java.util.logging.Level; | |
import java.util.logging.Logger; | |
import static java.lang.String.format; | |
public class JSoupFrame extends JFrame | |
{ | |
File lastChooserDir; | |
JTree tree; | |
private static final Logger LOG = Logger.getLogger(JSoupFrame.class.getName()); | |
public static void main(String[] args) throws Exception { | |
System.setProperty("com.apple.macos.useScreenMenuBar", "true"); | |
System.setProperty("apple.laf.useScreenMenuBar", "true"); | |
SwingUtilities.invokeAndWait(new Runnable() { | |
public void run() { | |
JSoupFrame f = new JSoupFrame(); | |
f.setVisible(true); | |
} | |
}); | |
} | |
public JSoupFrame() { | |
lastChooserDir = new File(System.getProperty("user.dir")); | |
setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE); | |
final Dimension dim = new Dimension(640, 480); | |
setPreferredSize(dim); | |
setMinimumSize(dim); | |
tree = new JTree(); | |
JMenuBar jmb = new JMenuBar(); | |
setJMenuBar(jmb); | |
final JMenu fileMenu = new JMenu("File"); | |
jmb.add(fileMenu); | |
final JMenu selectMenu = new JMenu("Select"); | |
jmb.add(selectMenu); | |
final JMenuItem openItem = new JMenuItem("Open"); | |
openItem.setMnemonic('O'); | |
openItem.setAccelerator(KeyStroke.getKeyStroke("meta O")); | |
fileMenu.add(openItem); | |
final JMenuItem cssMI = new JMenuItem("CSS"); | |
cssMI.setMnemonic('S'); | |
cssMI.setAccelerator(KeyStroke.getKeyStroke("meta S")); | |
selectMenu.add(cssMI); | |
openItem.addActionListener(new OpenFileAction()); | |
cssMI.addActionListener(new ActionListener() { | |
public void actionPerformed(ActionEvent ae) { | |
final String selector = | |
JOptionPane.showInputDialog(JSoupFrame.this, "CSS Selector?", ""); | |
if (null != selector && ! selector.isEmpty()) { | |
runSelection(selector); | |
} | |
} | |
}); | |
final JLabel openLabel = new JLabel("Open a File"); | |
openLabel.addMouseListener(new MouseAdapter() { | |
@Override | |
public void mouseClicked(MouseEvent e) { | |
new OpenFileAction().actionPerformed(null); | |
} | |
}); | |
getContentPane().add(openLabel); | |
} | |
private class OpenFileAction implements ActionListener | |
{ | |
public void actionPerformed(ActionEvent ae) { | |
JFileChooser jfc = new JFileChooser(lastChooserDir); | |
jfc.setFileSelectionMode(JFileChooser.FILES_ONLY); | |
if (JFileChooser.APPROVE_OPTION == jfc.showOpenDialog(JSoupFrame.this)) { | |
File f = jfc.getSelectedFile(); | |
lastChooserDir = f.getParentFile(); | |
runFile(f); | |
} | |
} | |
} | |
public void runSelection(String selector) { | |
final SoupTreeModel soupModel = (SoupTreeModel) tree.getModel(); | |
// Document doc = (Document)((SoupTreeNode) soupModel.getRoot()).getUserObject(); | |
// LOG.info(format("sel<%s>:=%s", selector, doc.select(selector).text())); | |
final TreePath[] paths = TreeHelpers.selectorToPath(soupModel.select(selector)); | |
boolean first = true; | |
for (TreePath tp : paths) { | |
System.err.println("--Path"); | |
for (Object o : tp.getPath()) { | |
SoupTreeNode tn = (SoupTreeNode)o; | |
Node n = tn.node(); | |
StringBuilder sb = new StringBuilder(); | |
if (n.hasAttr("id")) { | |
sb.append(' ').append("#").append(n.attr("id")); | |
} else if (n.hasAttr("itemtype")) { | |
sb.append("[itemtype=\"").append(n.attr("itemtype")).append(']'); | |
} else if (n.hasAttr("itemprop")) { | |
sb.append("[itemprop=\"").append(n.attr("itemprop")).append(']'); | |
} else if (n.hasAttr("class")) { | |
String cssClass = n.attr("class"); | |
final String[] parts = cssClass.split("\\p{Blank}"); | |
if (parts.length > 0) { | |
sb.append(' ').append(".").append(parts[0]); | |
} | |
} else { | |
int count = 1; | |
Node sib = n.previousSibling(); | |
while (null != sib) { | |
sib = sib.previousSibling(); | |
if (null != sib && sib.nodeName().equals(n.nodeName())) { | |
count++; | |
} | |
} | |
sb.append('[').append(count).append(']'); | |
} | |
if (!first) { | |
System.err.printf("/"); | |
} | |
System.err.printf("<%s%s>", n.nodeName(), sb); | |
first = false; | |
} | |
System.err.printf("%n--Path--%n"); | |
} | |
for (TreePath tp : paths) { | |
tree.makeVisible(tp); | |
} | |
// tree.addSelectionPaths(paths); | |
} | |
public void runFile(File f) { | |
String charset = null; | |
//noinspection ConstantConditions | |
runFile(f, charset); | |
} | |
public void runFile(File f, String charset) { | |
final Container contentPane = getContentPane(); | |
try { | |
runFileIOE(f, charset); | |
} catch (IOException ex) { | |
LOG.log(Level.SEVERE, "Kaboom", ex); | |
contentPane.removeAll(); | |
contentPane.add(new JLabel("Kaboom: "+ex)); | |
} | |
} | |
public void runFileIOE(File f, String charset) throws IOException { | |
final Container contentPane = getContentPane(); | |
final Document doc = Jsoup.parse(f, charset); | |
doc.outputSettings( | |
doc.outputSettings() | |
.prettyPrint(true) | |
.indentAmount(2) | |
.syntax(Document.OutputSettings.Syntax.xml)); | |
contentPane.removeAll(); | |
JTabbedPane jtp = new JTabbedPane(); | |
final SoupTreeModel soupModel = new SoupTreeModel(doc); | |
tree.setModel(soupModel); | |
tree.setExpandsSelectedPaths(true); | |
jtp.add("tree", | |
new JScrollPane(tree | |
, ScrollPaneConstants.VERTICAL_SCROLLBAR_ALWAYS | |
, ScrollPaneConstants.HORIZONTAL_SCROLLBAR_ALWAYS)); | |
final String html = doc.toString(); | |
final JTextArea jta = new JTextArea(html); | |
jta.setFont(Font.decode("Courier New-PLAIN-9")); | |
jta.setEditable(false); | |
jta.setLineWrap(true); | |
jtp.add("source", jta); | |
contentPane.add(jtp); | |
contentPane.setVisible(false); | |
contentPane.setVisible(true); | |
final String selector = ".meet-business-owner"; | |
runSelection(selector); | |
} | |
public static class SoupTreeModel extends DefaultTreeModel | |
{ | |
public SoupTreeModel(Node doc) { | |
super(SoupTreeNode.forNode(doc)); | |
} | |
private Node objectToNode(Object o) { | |
if (o instanceof Node) { | |
return (Node)o; | |
} else if (o instanceof SoupTreeModel) { | |
return objectToNode(((SoupTreeModel) o).root); | |
} else if (o instanceof SoupTreeNode) { | |
return (Node)((SoupTreeNode)o).getUserObject(); | |
} else { | |
throw new IllegalArgumentException( | |
format("Quoi?<%s>", null == o ? "NULL" : o.getClass().getName())); | |
} | |
} | |
public SoupTreeNode[] select(String cssQuery) { | |
final Elements elements = ((Document) objectToNode(this.root)).select(cssQuery); | |
SoupTreeNode[] results = new SoupTreeNode[elements.size()]; | |
int i = 0; | |
for (Node el : elements) { | |
results[i++] = SoupTreeNode.forNode(el); | |
} | |
return results; | |
} | |
@Override | |
public boolean isLeaf(Object node) { | |
Node n = objectToNode(node); | |
return TreeHelpers.isLeaf(n); | |
} | |
@Override | |
public int getChildCount(Object parent) { | |
Node n = objectToNode(parent); | |
return TreeHelpers.getChildCount(n); | |
} | |
@Override | |
public Object getChild(Object parent, int index) { | |
Node n = objectToNode(parent); | |
return new SoupTreeModel(TreeHelpers.getChild(n, index)); | |
} | |
@Override | |
public int getIndexOfChild(Object parent, Object child) { | |
Node n = objectToNode(parent); | |
Node ch = objectToNode(child); | |
return TreeHelpers.getIndexOfChild(n, ch); | |
} | |
@Override | |
public String toString() { | |
final Node self = objectToNode(root); | |
return self.nodeName() + "[" + self.attributes() + "]"; | |
} | |
} | |
public static class SoupTreeNode extends DefaultMutableTreeNode | |
{ | |
private static Map<Node, SoupTreeNode> cache = new WeakHashMap<Node, SoupTreeNode>(); | |
public static SoupTreeNode forNode(Node n) { | |
SoupTreeNode result; | |
synchronized (SoupTreeNode.class) { | |
if (cache.containsKey(n)) { | |
result = cache.get(n); | |
} else { | |
result = new SoupTreeNode(n); | |
cache.put(n, result); | |
} | |
} | |
return result; | |
} | |
private SoupTreeNode(Node userObject) { | |
super(userObject); | |
} | |
private Node node() { | |
return (Node)this.userObject; | |
} | |
@Override | |
public TreeNode getParent() { | |
Node parent = node().parent(); | |
if (null == parent) { | |
return null; | |
} | |
return forNode(parent); | |
} | |
@Override | |
public boolean isLeaf() { | |
return TreeHelpers.isLeaf(node()); | |
} | |
@Override | |
public int getChildCount() { | |
return TreeHelpers.getChildCount(node()); | |
} | |
@Override | |
public TreeNode getChildAt(int index) { | |
return forNode(TreeHelpers.getChild(node(), index)); | |
} | |
@Override | |
public int getIndex(TreeNode aChild) { | |
return TreeHelpers.getIndexOfChild(node(), ((SoupTreeNode) aChild).node()); | |
} | |
@Override | |
public String toString() { | |
return node().nodeName(); | |
} | |
} | |
public static class TreeHelpers { | |
public static TreePath[] selectorToPath(SoupTreeNode[] leaves) { | |
if (null == leaves) { | |
return new TreePath[0]; | |
} | |
final TreePath[] results = new TreePath[leaves.length]; | |
for (int i = 0; i < leaves.length; i++) { | |
results[i] = new TreePath(leaves[i].getPath()); | |
} | |
return results; | |
} | |
public static boolean isLeaf(Node node) { | |
return 0 == node.childNodeSize(); | |
} | |
public static int getChildCount(Node parent) { | |
return parent.childNodeSize(); | |
} | |
public static Node getChild(Node parent, int index) { | |
return parent.childNode(index); | |
} | |
public static int getIndexOfChild(Node parent, Object child) { | |
for (int i = 0, len = parent.childNodeSize(); i < len; i++) { | |
final Node n = parent.childNode(i); | |
if (n.equals(child)) { | |
return i; | |
} | |
} | |
return -1; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment