Skip to content

Instantly share code, notes, and snippets.

@potmat
Last active January 19, 2018 12:29
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save potmat/f0fcfe9195631978dc33 to your computer and use it in GitHub Desktop.
Save potmat/f0fcfe9195631978dc33 to your computer and use it in GitHub Desktop.
Extract Merged Cells and Formatted Values for Google Spreadsheets in Apps Script
function columnToLetter(column)
{
var temp, letter = '';
while (column > 0)
{
temp = (column - 1) % 26;
letter = String.fromCharCode(temp + 65) + letter;
column = (column - temp - 1) / 26;
}
return letter;
}
function createBooleanArray(length) {
if (arguments.length === 0) { return false }
var arr = new Array(length),
i = length;
var args = Array.prototype.slice.call(arguments, 1);
while(i--) arr[length-1 - i] = createBooleanArray.apply(this, args);
return arr;
}
var _ = underscorejs.load();
var docId = '<insert doc id here>';
//Download the spreadsheet as HTML
var zipUrl = Utilities.formatString('https://docs.google.com/spreadsheets/d/%s/export?format=zip', docId);
var zipUrlParams = {
method : "get",
headers : {"Authorization": "Bearer " + ScriptApp.getOAuthToken()},
muteHttpExceptions:true,
};
//Build a hash of cell index (e.g. 'Sheet1:A3') to rowspan and colspan. If a cell is part of a merged range and
//not the top left cell in that range it will not appear in the hash.
var parsedHtml = _.chain(Utilities.unzip(UrlFetchApp.fetch(zipUrl,zipUrlParams).getBlob()))
.filter(function (x) { return _.contains(sheetNames, x.getName().slice(0, -5)) })
.map(function (x) {
var sheetName = x.getName().slice(0, -5);
var sheet = document.getSheetByName(sheetName);
var dataRange = sheet.getDataRange();
var visited = createBooleanArray(dataRange.getNumRows(), dataRange.getNumColumns());
var xmlString = x.getDataAsString().match(/<tbody>(.*?)<\/tbody>/g);
if (!xmlString || xmlString.length === 0) { throw ('Could not find tbody element') }
//Clean up unclosed tags.
xmlString = xmlString[0].replace(/<br>/g, '');
return _.map(
XmlService.parse(xmlString).getRootElement().getChildren('tr'),
function (tableRow, tableRowIndex) {
var rowIndices = _.chain(this.visited[tableRowIndex])
.map(function (value, index) { return {index: index, value: value }})
.filter(function (x) { return !x.value })
.pluck('index')
.value();
return _.map(
tableRow.getChildren('td'),
function (tableCell, tableColumnIndex) {
if (tableColumnIndex + this.increaseBy >= rowIndices.length) { return null }
var rowspan = tableCell.getAttribute('rowspan') ? parseInt(tableCell.getAttribute('rowspan').getValue()) : 1;
var colspan = tableCell.getAttribute('colspan') ? parseInt(tableCell.getAttribute('colspan').getValue()) : 1;
var columnLetter = columnToLetter(rowIndices[tableColumnIndex + this.increaseBy] + 1);
for (var i = 0; i < rowspan; i++) {
if (tableRowIndex + i > this.visited.length) { break; }
for (var j = 0; j < colspan; j++) {
if (tableColumnIndex + this.increaseBy >= rowIndices.length
|| rowIndices[tableColumnIndex + this.increaseBy]+j >= this.visited[0].length) {
break;
}
this.visited[tableRowIndex+i][rowIndices[tableColumnIndex + this.increaseBy]+j] = true;
}
}
if (colspan > 1) { this.increaseBy += colspan-1 }
return {
index: Utilities.formatString('%s!%s%s', sheetName, columnLetter, (tableRowIndex+1)),
text: _.chain(tableCell.getDescendants())
.filter(function (x) { return x.getType() == XmlService.ContentTypes.TEXT })
.map(function (x) { return x.asText().getText() })
.value().join(' ').trim(),
rowspan: rowspan,
colspan: colspan
};
},
{increaseBy: 0, visited: this.visited})
},
{visited: visited})
})
.flatten()
.filter(function (x) { return x })
.indexBy(function (x) { return x.index })
.value();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment