Last active
October 19, 2020 03:39
-
-
Save KenDUemura/983766f3f276b984f40e10a311eb72dc to your computer and use it in GitHub Desktop.
Google Apps Script for parse table element from HTML string parsed with XmlService and returns 2D array
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
PARAMS: | |
text XML (In this gist, namespace needs to be 'http://www.w3.org/1999/xhtml') | |
path XPATH (So far tested with simple indexed lookup /html/body/table[3]) | |
RETURNS: | |
Array[][] (Table data) | |
Missing th/thead/tfoot support | |
*/ | |
function getDataFromXpath (text, path) { | |
var xmlDoc = XmlService.parse(text) | |
Logger.log("INFO: xmlDoc \n" + xmlDoc) | |
// html will be the RootElement | |
path = path.replace("/html/","") | |
var tags = path.split("/"); | |
Logger.log("tags : " + tags); | |
var element = xmlDoc.getRootElement(); | |
var namespace = XmlService.getNamespace('http://www.w3.org/1999/xhtml'); | |
for(var i in tags) { | |
var tag = tags[i]; | |
Logger.log("Tag : " + tag); | |
var index = tag.indexOf("["); | |
if(index != -1) { | |
var val = parseInt(tag.match(/\[(\d+)\]/)[1]); | |
tag = tag.substring(0,index); | |
element = element.getChildren(tag, namespace)[val-1]; | |
} else { | |
element = element.getChild(tag, namespace) | |
} | |
Logger.log(element); | |
} | |
var data = []; | |
if (tags[tags.length - 1].match('table')) { | |
Logger.log("Parsing Table") | |
// TODO: thead | |
// tbody | |
var tbody = element.getChild("tbody", namespace); | |
if (tbody) { | |
element = tbody; | |
} | |
var rows = element.getChildren("tr", namespace); | |
for (var i in rows) { | |
var row = []; | |
var cols = rows[i].getChildren("td", namespace); | |
for (var j in cols) { | |
var cell = cols[j].getValue(); | |
row.push(cell) | |
} | |
} | |
// TODO: tfoot | |
} else { | |
Logger.log("Unsupported tag type: " + tags[tags.length - 1]) | |
} | |
return data; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment