Created
February 20, 2021 19:25
-
-
Save mvogelgesang/f92d45915a82c7bf872f25b40f0c64cb to your computer and use it in GitHub Desktop.
Site Scanner
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* A series of custom Google Sheet functions that allow someone to scan a spreadsheet of websites | |
* @author mvogelgesang http://github.com/mvogelgesang | |
*/ | |
/** | |
* Checks to see if website has an /about, /aboutus, or /about-us page | |
* | |
* @param {URL} siteURL - URL of the site | |
* @return {boolean} - True or False if site has page | |
* @customfunction | |
*/ | |
function ABOUTPAGE(siteURL) { | |
var url = validateUrl(siteURL); | |
var requestArray = [{ | |
'url': url + '/aboutus', | |
'muteHttpExceptions': true | |
}, | |
{ | |
'url': url + '/about-us', | |
'muteHttpExceptions': true | |
}, | |
{ | |
'url': url + '/about', | |
'muteHttpExceptions': true | |
} | |
]; | |
var results = UrlFetchApp.fetchAll(requestArray); | |
var validAbout = false; | |
results.forEach(function(result) { | |
if (result.getResponseCode() == 200) { | |
validAbout = true; | |
} | |
}); | |
return validAbout; | |
} | |
/** | |
* Returns the final redirect location for a given starting URL | |
* | |
* @param {URL} siteURL - URL of the site | |
* @returns redirect location | |
* @customfunction | |
*/ | |
function REDIRECTLOCATION(siteURL) { | |
var params = { | |
'followRedirects': false, | |
'muteHttpExceptions': true | |
}; | |
var result = callAPI(siteURL, 'headers', params); | |
if (!result.Location) { | |
return ""; | |
} | |
return result.Location; | |
} | |
/** | |
* Searches a given page for "login" or "log in" | |
* | |
* @param {url} siteURL - url of website to test | |
* @return True or False if website has text on the page | |
* @customfunction | |
*/ | |
function HASLOGIN(siteURL) { | |
var result = callAPI(siteURL); | |
var loginRegex = /([Ll]ogin|[Ll]og in)/; | |
Logger.log(result); | |
if (loginRegex.exec(result)) { | |
return true; | |
} | |
return false; | |
} | |
/** | |
* Given a URL, returns the HTTP Response Code. By default, redirects are followed. | |
* | |
* @param {url} siteURL - url of website to test | |
* @param {TRUE} followRedirects - [optional] - (Default = TRUE). TRUE to follow redirects and report the final HTTP Response. FALSE to return first HTTP Response. | |
* @return {HTTP Response Code} | |
* @customfunction | |
*/ | |
function RESPONSECODE(siteURL, followRedirects=true) { | |
var params = { | |
'followRedirects': followRedirects, | |
'muteHttpExceptions': true | |
}; | |
return callAPI(siteURL, "responseCode", params); | |
} | |
/** | |
* Submits a GET request | |
* | |
* @param {string} url - The api endpoint | |
* @param {string} resultType - [Optional] The api response (content, headers, responseCode) | |
* @param {object} params - [Optional] Any key value pairs that are specific to the UrlFetchApp class (https://developers.google.com/apps-script/reference/url-fetch/url-fetch-app#fetch(String,Object)) | |
* @param {boolean} json - [Optional] Boolean value to denote whether or not the expected response is JSON or not | |
* | |
* @returns {string|object|number} | |
*/ | |
function callAPI(url, resultType = "content", params={}, json = false) { | |
var url = validateUrl(url); | |
var result = UrlFetchApp.fetch(url, params); | |
if (resultType == "content" && json) { | |
return JSON.parse(result.getContentText()); | |
} | |
if (resultType == "content") { | |
return result.getContentText(); | |
} | |
if (resultType == "headers") { | |
return result.getHeaders(); | |
} | |
if (resultType == "responseCode") { | |
return result.getResponseCode(); | |
} | |
} | |
/** | |
* given a URL (gsa.gov, http://gsa.gov, https://gsa.gov), returns a string with a fully qualified https protocol and domain | |
* | |
* @param {string} url | |
* @returns {string} complete https url | |
*/ | |
function validateUrl(url) { | |
var httpRegex = /http:\/\//i; | |
var httpsRegex = /https:\/\//i; | |
var url = url.replace(httpRegex, 'https://'); | |
if (!httpsRegex.exec(url)) { | |
return 'https://' + url; | |
} | |
return url; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment