Skip to content

Instantly share code, notes, and snippets.

@achabotl
Last active March 8, 2023 18:07
Show Gist options
  • Save achabotl/c914cd6817748e794852fd5d6a1bf1ad to your computer and use it in GitHub Desktop.
Save achabotl/c914cd6817748e794852fd5d6a1bf1ad to your computer and use it in GitHub Desktop.
Validate External Links in Google Docs
/**
* Creates a menu entry in the Google Docs UI when the document is opened.
* This method is only used by the regular add-on, and is never called by
* the mobile add-on version.
*
* @param {object} e The event parameter for a simple onOpen trigger. To
* determine which authorization mode (ScriptApp.AuthMode) the trigger is
* running in, inspect e.authMode.
*/
function onOpen(e) {
DocumentApp.getUi().createAddonMenu()
.addItem('Highlight Broken Links', 'validateUrlsInDoc')
.addToUi();
}
function validateUrlsInDoc() {
var doc = DocumentApp.getActiveDocument();
var body = doc.getBody();
// Find all links in doc
var textLinks = getAllTextLinks(body);
var richLinks = getAllRichLinks(body);
var links = textLinks.concat(richLinks);
var brokenLinks = [];
for (var link of links) {
var url = link.url;
var error = null;
if (isExternalUrl(url)) {
try {
var response = UrlFetchApp.fetch(url, {'muteHttpExceptions': true});
}
catch (err) {
error = err.message
}
var responseCode = response.getResponseCode();
if (responseCode >= 400) {
error = responseCode;
}
if (error) {
brokenLinks.push(link)
var message = "Invalid URL: " + url + " (response code: " + error + ")";
console.log(message);
}
}
}
// Highlight in yellow
for (var brokenLink of brokenLinks) {
brokenLink.element.setBackgroundColor("#ffff00");
}
}
function isExternalUrl(url) {
return url.indexOf("http://") === 0 || url.indexOf("https://") === 0;
}
/**
* Get an array of all RICH_URL in the document.
*
* @param {Element} element The document element to operate on.
* .
* @returns {Array} Array of objects, vis
* {element,
* startOffset,
* endOffsetInclusive,
* url}
*
*/
function getAllRichLinks(element) {
var links = [];
element = element || DocumentApp.getActiveDocument().getBody();
// Define the search parameters.
var searchType = DocumentApp.ElementType.RICH_LINK;
var searchResult = null;
// Search until the paragraph is found.
while (searchResult = element.findElement(searchType, searchResult)) {
var childElement = searchResult.getElement();
links.push({
'element': childElement,
'startOffset': searchResult.getStartOffset(),
'endOffsetInclusive': searchResult.getEndOffsetInclusive(),
'url': childElement.getUrl()
})
}
return links;
}
/**
* Get an array of all LinkUrls in the document. The function is
* recursive, and if no element is provided, it will default to
* the active document's Body element.
*
* @param {Element} element The document element to operate on.
* .
* @returns {Array} Array of objects, vis
* {element,
* startOffset,
* endOffsetInclusive,
* url}
*
* From: https://stackoverflow.com/questions/18727341/get-all-links-in-a-document
* It's gnarly, but GDocs doesn't have the equivalent of find_all("a") in
* BeautifulSoup.
*/
function getAllTextLinks(element) {
var links = [];
element = element || DocumentApp.getActiveDocument().getBody();
if (element.getType() === DocumentApp.ElementType.TEXT) {
var textObj = element.editAsText();
var text = element.getText();
var inUrl = false;
for (var ch=0; ch < text.length; ch++) {
var url = textObj.getLinkUrl(ch);
if (url != null) {
if (!inUrl) {
// We are now!
inUrl = true;
var curUrl = {};
curUrl.element = element;
curUrl.url = String( url ); // grab a copy
curUrl.startOffset = ch;
}
else {
curUrl.endOffsetInclusive = ch;
}
}
else {
if (inUrl) {
// Not any more, we're not.
inUrl = false;
links.push(curUrl); // add to links
curUrl = {};
}
}
}
if (inUrl) {
// in case the link ends on the same char that the element does
links.push(curUrl);
}
}
else {
try {
var numChildren = element.getNumChildren();
}
catch(err) {
return [];
}
for (var i=0; i<numChildren; i++) {
links = links.concat(getAllTextLinks(element.getChild(i)));
}
}
return links;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment