Skip to content

Instantly share code, notes, and snippets.

@planbnet
Created March 16, 2010 07:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save planbnet/333736 to your computer and use it in GitHub Desktop.
Save planbnet/333736 to your computer and use it in GitHub Desktop.
// Bookmarklet to calculate the most generic xpath for the current selection
// (helper utility for scraping websites)
function selectionxpath() {
function calculateShortestXpathOfElement( sel ) {
var node = sel;
var nextId = null;
var stop = null;
var xpath = "";
//find next element with an id
while (true) {
if (node.id && node.id != "") {
nextId = node.id;
break;
}
node = node.parentNode;
if (node == stop) break;
}
if (nextId != null) {
xpath = "//" + node.tagName.toLowerCase() + "[@id='" + nextId + "']";
if ( node == sel ) {
return xpath;
} else {
stop = node;
}
}
//find next element with unique tag+class
node = sel;
var nextUniqueClass = null;
while (true) {
if (node.nodeType === 1) {
var styleClass = node.getAttribute("class");
if (styleClass != null) {
var tmpXpath = xpath+"//"+node.tagName+"[@class='"+styleClass+"']";
var tempResult = document.evaluate(tmpXpath, sel.ownerDocument, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
if (tempResult.singleNodeValue == node) {
nextUniqueClass = styleClass;
break;
}
}
}
node = node.parentNode;
if (node == stop) break;
}
if (nextUniqueClass != null) {
xpath += "//"+node.tagName.toLowerCase()+"[@class='"+nextUniqueClass+"']";
if ( node == sel ) {
return xpath;
} else {
stop = node;
}
}
//find next element with unique tag
node = sel;
var nextUniqueTag = null;
while (true) {
if (node.nodeType === 1) {
var tmpXpath = xpath+"//"+node.tagName;
var tempResult = document.evaluate(tmpXpath, sel.ownerDocument, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
if (tempResult.singleNodeValue == node) {
nextUniqueTag = node.tagName;
break;
}
}
node = node.parentNode;
if (node == stop) break;
}
if (nextUniqueTag != null) {
xpath += "//"+node.tagName.toLowerCase();
if ( node == sel ) {
return xpath;
} else {
stop = node;
}
}
//get absolute path for the rest
var restPath = "";
for (node = sel; node && node.nodeType == 1; node = node.parentNode) {
if (node == stop) break;
var idx = 1;
for (var sib = node.previousSibling; sib ; sib = sib.previousSibling) {
if(sib.nodeType == 1 && sib.tagName == node.tagName) idx++;
}
var xname = node.tagName.toLowerCase();
if (idx > 1) xname += "[" + idx + "]";
restPath = "/" + xname + restPath;
}
var result = xpath + restPath;
return result;
}
function depthOf( el ) {
i = 0;
while (el) {
el = el.parentNode;
i++;
}
return i;
}
function calculateShortestXpathOfSelection() {
var sel = window.getSelection().getRangeAt(0);
if (!sel) return null;
var start = sel.startContainer;
var end = sel.endContainer;
var i = depthOf( start );
var j = depthOf( end );
while (start != end && i != 0 && j != 0) {
if (i > j) {
start = start.parentNode;
i--;
} else {
end = end.parentNode;
j--;
}
}
return calculateShortestXpathOfElement(start);
}
var xpath = calculateShortestXpathOfSelection();
var node = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
var border = node.style.border;
if (!border) border = "";
node.style.border = "2px dashed red";
if (xpath) {
prompt("Most generic xpath for selection:", xpath);
node.style.border = border;
} else {
alert("Could not determine generic xpath for selection");
}
}
selectionxpath();
<html><body><p><a href="javascript:function%20selectionxpath()%20{function%20calculateShortestXpathOfElement(%20sel%20)%20{var%20node%20=%20sel;var%20nextId%20=%20null;var%20stop%20=%20null;var%20xpath%20=%20%22%22;while%20(true)%20{if%20(node.id%20&&%20node.id%20!=%20%22%22)%20{nextId%20=%20node.id;break;}node%20=%20node.parentNode;if%20(node%20==%20stop)%20break;}if%20(nextId%20!=%20null)%20{xpath%20=%20%22//%22%20+%20node.tagName.toLowerCase()%20+%20%22[@id=%27%22%20+%20nextId%20+%20%22%27]%22;if%20(%20node%20==%20sel%20)%20{return%20xpath;}%20else%20{stop%20=%20node;}}node%20=%20sel;var%20nextUniqueClass%20=%20null;while%20(true)%20{if%20(node.nodeType%20===%201)%20{var%20styleClass%20=%20node.getAttribute(%22class%22);if%20(styleClass%20!=%20null)%20{var%20tmpXpath%20=%20xpath+%22//%22+node.tagName+%22[@class=%27%22+styleClass+%22%27]%22;var%20tempResult%20=%20document.evaluate(tmpXpath,%20sel.ownerDocument,%20null,%20XPathResult.FIRST_ORDERED_NODE_TYPE,%20null);if%20(tempResult.singleNodeValue%20==%20node)%20{nextUniqueClass%20=%20styleClass;break;}}}node%20=%20node.parentNode;if%20(node%20==%20stop)%20break;}if%20(nextUniqueClass%20!=%20null)%20{xpath%20+=%20%22//%22+node.tagName.toLowerCase()+%22[@class=%27%22+nextUniqueClass+%22%27]%22;if%20(%20node%20==%20sel%20)%20{return%20xpath;}%20else%20{stop%20=%20node;}}node%20=%20sel;var%20nextUniqueTag%20=%20null;while%20(true)%20{if%20(node.nodeType%20===%201)%20{var%20tmpXpath%20=%20xpath+%22//%22+node.tagName;var%20tempResult%20=%20document.evaluate(tmpXpath,%20sel.ownerDocument,%20null,%20XPathResult.FIRST_ORDERED_NODE_TYPE,%20null);if%20(tempResult.singleNodeValue%20==%20node)%20{nextUniqueTag%20=%20node.tagName;break;}}node%20=%20node.parentNode;if%20(node%20==%20stop)%20break;}if%20(nextUniqueTag%20!=%20null)%20{xpath%20+=%20%22//%22+node.tagName.toLowerCase();if%20(%20node%20==%20sel%20)%20{return%20xpath;}%20else%20{stop%20=%20node;}}var%20restPath%20=%20%22%22;for%20(node%20=%20sel;%20node%20&&%20node.nodeType%20==%201;%20node%20=%20node.parentNode)%20{if%20(node%20==%20stop)%20break;var%20idx%20=%201;for%20(var%20sib%20=%20node.previousSibling;%20sib%20;%20sib%20=%20sib.previousSibling)%20{if(sib.nodeType%20==%201%20&&%20sib.tagName%20==%20node.tagName)%20idx++;}var%20xname%20=%20node.tagName.toLowerCase();if%20(idx%20>%201)%20xname%20+=%20%22[%22%20+%20idx%20+%20%22]%22;restPath%20=%20%22/%22%20+%20xname%20+%20restPath;}var%20result%20=%20xpath%20+%20restPath;return%20result;}function%20depthOf(%20el%20)%20{i%20=%200;while%20(el)%20{el%20=%20el.parentNode;i++;}return%20i;}function%20calculateShortestXpathOfSelection()%20{var%20sel%20=%20window.getSelection().getRangeAt(0);if%20(!sel)%20return%20null;var%20start%20=%20sel.startContainer;var%20end%20=%20sel.endContainer;var%20i%20=%20depthOf(%20start%20);var%20j%20=%20depthOf(%20end%20);while%20(start%20!=%20end%20&&%20i%20!=%200%20&&%20j%20!=%200)%20{if%20(i%20>%20j)%20{start%20=%20start.parentNode;i--;}%20else%20{end%20=%20end.parentNode;j--;}}return%20calculateShortestXpathOfElement(start);}var%20xpath%20=%20calculateShortestXpathOfSelection();var%20node%20=%20document.evaluate(xpath,%20document,%20null,%20XPathResult.FIRST_ORDERED_NODE_TYPE,%20null).singleNodeValue;var%20border%20=%20node.style.border;if%20(!border)%20border%20=%20%22%22;node.style.border%20=%20%222px%20dashed%20red%22;if%20(xpath)%20{prompt(%22Most%20generic%20xpath%20for%20selection:%22,%20xpath);node.style.border%20=%20border;}%20else%20{alert(%22Could%20not%20determine%20generic%20xpath%20for%20selection%22);}}selectionxpath();">Selection XPath</a></p><p/><p>Drag the link to you bookmark bar</p></body></html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment