Last active
January 19, 2018 12:29
-
-
Save potmat/f0fcfe9195631978dc33 to your computer and use it in GitHub Desktop.
Extract Merged Cells and Formatted Values for Google Spreadsheets in Apps Script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function columnToLetter(column) | |
{ | |
var temp, letter = ''; | |
while (column > 0) | |
{ | |
temp = (column - 1) % 26; | |
letter = String.fromCharCode(temp + 65) + letter; | |
column = (column - temp - 1) / 26; | |
} | |
return letter; | |
} | |
function createBooleanArray(length) { | |
if (arguments.length === 0) { return false } | |
var arr = new Array(length), | |
i = length; | |
var args = Array.prototype.slice.call(arguments, 1); | |
while(i--) arr[length-1 - i] = createBooleanArray.apply(this, args); | |
return arr; | |
} | |
var _ = underscorejs.load(); | |
var docId = '<insert doc id here>'; | |
//Download the spreadsheet as HTML | |
var zipUrl = Utilities.formatString('https://docs.google.com/spreadsheets/d/%s/export?format=zip', docId); | |
var zipUrlParams = { | |
method : "get", | |
headers : {"Authorization": "Bearer " + ScriptApp.getOAuthToken()}, | |
muteHttpExceptions:true, | |
}; | |
//Build a hash of cell index (e.g. 'Sheet1:A3') to rowspan and colspan. If a cell is part of a merged range and | |
//not the top left cell in that range it will not appear in the hash. | |
var parsedHtml = _.chain(Utilities.unzip(UrlFetchApp.fetch(zipUrl,zipUrlParams).getBlob())) | |
.filter(function (x) { return _.contains(sheetNames, x.getName().slice(0, -5)) }) | |
.map(function (x) { | |
var sheetName = x.getName().slice(0, -5); | |
var sheet = document.getSheetByName(sheetName); | |
var dataRange = sheet.getDataRange(); | |
var visited = createBooleanArray(dataRange.getNumRows(), dataRange.getNumColumns()); | |
var xmlString = x.getDataAsString().match(/<tbody>(.*?)<\/tbody>/g); | |
if (!xmlString || xmlString.length === 0) { throw ('Could not find tbody element') } | |
//Clean up unclosed tags. | |
xmlString = xmlString[0].replace(/<br>/g, ''); | |
return _.map( | |
XmlService.parse(xmlString).getRootElement().getChildren('tr'), | |
function (tableRow, tableRowIndex) { | |
var rowIndices = _.chain(this.visited[tableRowIndex]) | |
.map(function (value, index) { return {index: index, value: value }}) | |
.filter(function (x) { return !x.value }) | |
.pluck('index') | |
.value(); | |
return _.map( | |
tableRow.getChildren('td'), | |
function (tableCell, tableColumnIndex) { | |
if (tableColumnIndex + this.increaseBy >= rowIndices.length) { return null } | |
var rowspan = tableCell.getAttribute('rowspan') ? parseInt(tableCell.getAttribute('rowspan').getValue()) : 1; | |
var colspan = tableCell.getAttribute('colspan') ? parseInt(tableCell.getAttribute('colspan').getValue()) : 1; | |
var columnLetter = columnToLetter(rowIndices[tableColumnIndex + this.increaseBy] + 1); | |
for (var i = 0; i < rowspan; i++) { | |
if (tableRowIndex + i > this.visited.length) { break; } | |
for (var j = 0; j < colspan; j++) { | |
if (tableColumnIndex + this.increaseBy >= rowIndices.length | |
|| rowIndices[tableColumnIndex + this.increaseBy]+j >= this.visited[0].length) { | |
break; | |
} | |
this.visited[tableRowIndex+i][rowIndices[tableColumnIndex + this.increaseBy]+j] = true; | |
} | |
} | |
if (colspan > 1) { this.increaseBy += colspan-1 } | |
return { | |
index: Utilities.formatString('%s!%s%s', sheetName, columnLetter, (tableRowIndex+1)), | |
text: _.chain(tableCell.getDescendants()) | |
.filter(function (x) { return x.getType() == XmlService.ContentTypes.TEXT }) | |
.map(function (x) { return x.asText().getText() }) | |
.value().join(' ').trim(), | |
rowspan: rowspan, | |
colspan: colspan | |
}; | |
}, | |
{increaseBy: 0, visited: this.visited}) | |
}, | |
{visited: visited}) | |
}) | |
.flatten() | |
.filter(function (x) { return x }) | |
.indexBy(function (x) { return x.index }) | |
.value(); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment