-
-
Save vs4vijay/6724868 to your computer and use it in GitHub Desktop.
function getDataFromXpath(path, url) { | |
var data = UrlFetchApp.fetch(url); | |
var text = data.getContentText(); | |
var xmlDoc = Xml.parse(text, true); | |
// Replacing tbody tag because app script doesnt understand. | |
path = path.replace("/html/","").replace("/tbody","","g"); | |
var tags = path.split("/"); | |
Logger.log("tags : " + tags); | |
// getting the DOM of HTML | |
var element = xmlDoc.getElement(); | |
for(var i in tags) { | |
var tag = tags[i]; | |
Logger.log("Tag : " + tag); | |
var index = tag.indexOf("["); | |
if(index != -1) { | |
var val = parseInt(tag[index+1]); | |
tag = tag.substring(0,index); | |
element = element.getElements(tag)[val-1]; | |
} else { | |
element = element.getElement(tag); | |
} | |
//Logger.log(element.toXmlString()); | |
} | |
return element.getText() ;//+ ' [ ' + element.getAttribute("href").getValue() + ' ] '; | |
} |
// changed from original code to accept index >9
Hi ,
By changing :
line 16: var val = parseInt(tag[index+1]);
by :
var val = parseInt(tag.substring(index+1, indexend));
It accept index biger then 9 , usefull for big table.
Cheers
fred
I've been using your code and overall it works pretty fine, but I think I found a bug.
Not working for /html/body/div[5]/div[4]/div[2]/div[2]/div/div/div[1]/div[1] in Link
TypeError: Cannot call method "getElements" of undefined. (line 21).
It should show just the title of the challenge. I just copy-pasted the xpath using firefox.
Greetings.
Luis.
I've improved this a little bit by allowing regex matching for these indexes.
- It starts by removing any
html
from the xpath, since that'll be the first element we send, basically - This works when going from a root element (html in this case) to any element inside of the DOM. Does not work with going back one level. If I happen to need it in my project, I might update this with that logic as well.
- Since you can provide any element to it, you can pass in an XPath relative to that element to the child/ren you want
Hope this helps.
function getElementByXPath(element, xPath) {
var path = xPath.replace(/\/?html\//gi,'');
var tokens = path.split("/");
var _element = element;
try {
for(var i in tokens) {
var token = tokens[i];
var hasIndex = token.indexOf("[") !== -1;
if(hasIndex) {
var match = token.match(/(\w+)\[(\d+)\]/i); // attempts to extract the tag name and the idex from the token
var tag = match[1];
var index = match[2]; // index is 1-based
_element = _element.getChildren(tag)[index - 1];
} else {
_element = _element.getChild(token);
}
}
} catch(e) {}
return _element;
}
It's showing XML is not defined. Can someone help me with this immediately?
@mananchawla8 I think XML
has changed to XmlService
. but the script doesn't work btw.
https://developers.google.com/apps-script/reference/xml-service
yea this code snippet does not work any more. Which is not surprising giving that it was published 9 years ago
Doesn't work for /html/body/div[@Class='something']/form
Only works for /html/body/div[2]/form