Skip to content

Instantly share code, notes, and snippets.

@mvogelgesang
Created February 20, 2021 19:25
Show Gist options
  • Save mvogelgesang/f92d45915a82c7bf872f25b40f0c64cb to your computer and use it in GitHub Desktop.
Save mvogelgesang/f92d45915a82c7bf872f25b40f0c64cb to your computer and use it in GitHub Desktop.
Site Scanner
/**
* A series of custom Google Sheet functions that allow someone to scan a spreadsheet of websites
* @author mvogelgesang http://github.com/mvogelgesang
*/
/**
* Checks to see if website has an /about, /aboutus, or /about-us page
*
* @param {URL} siteURL - URL of the site
* @return {boolean} - True or False if site has page
* @customfunction
*/
function ABOUTPAGE(siteURL) {
var url = validateUrl(siteURL);
var requestArray = [{
'url': url + '/aboutus',
'muteHttpExceptions': true
},
{
'url': url + '/about-us',
'muteHttpExceptions': true
},
{
'url': url + '/about',
'muteHttpExceptions': true
}
];
var results = UrlFetchApp.fetchAll(requestArray);
var validAbout = false;
results.forEach(function(result) {
if (result.getResponseCode() == 200) {
validAbout = true;
}
});
return validAbout;
}
/**
* Returns the final redirect location for a given starting URL
*
* @param {URL} siteURL - URL of the site
* @returns redirect location
* @customfunction
*/
function REDIRECTLOCATION(siteURL) {
var params = {
'followRedirects': false,
'muteHttpExceptions': true
};
var result = callAPI(siteURL, 'headers', params);
if (!result.Location) {
return "";
}
return result.Location;
}
/**
* Searches a given page for "login" or "log in"
*
* @param {url} siteURL - url of website to test
* @return True or False if website has text on the page
* @customfunction
*/
function HASLOGIN(siteURL) {
var result = callAPI(siteURL);
var loginRegex = /([Ll]ogin|[Ll]og in)/;
Logger.log(result);
if (loginRegex.exec(result)) {
return true;
}
return false;
}
/**
* Given a URL, returns the HTTP Response Code. By default, redirects are followed.
*
* @param {url} siteURL - url of website to test
* @param {TRUE} followRedirects - [optional] - (Default = TRUE). TRUE to follow redirects and report the final HTTP Response. FALSE to return first HTTP Response.
* @return {HTTP Response Code}
* @customfunction
*/
function RESPONSECODE(siteURL, followRedirects=true) {
var params = {
'followRedirects': followRedirects,
'muteHttpExceptions': true
};
return callAPI(siteURL, "responseCode", params);
}
/**
* Submits a GET request
*
* @param {string} url - The api endpoint
* @param {string} resultType - [Optional] The api response (content, headers, responseCode)
* @param {object} params - [Optional] Any key value pairs that are specific to the UrlFetchApp class (https://developers.google.com/apps-script/reference/url-fetch/url-fetch-app#fetch(String,Object))
* @param {boolean} json - [Optional] Boolean value to denote whether or not the expected response is JSON or not
*
* @returns {string|object|number}
*/
function callAPI(url, resultType = "content", params={}, json = false) {
var url = validateUrl(url);
var result = UrlFetchApp.fetch(url, params);
if (resultType == "content" && json) {
return JSON.parse(result.getContentText());
}
if (resultType == "content") {
return result.getContentText();
}
if (resultType == "headers") {
return result.getHeaders();
}
if (resultType == "responseCode") {
return result.getResponseCode();
}
}
/**
* given a URL (gsa.gov, http://gsa.gov, https://gsa.gov), returns a string with a fully qualified https protocol and domain
*
* @param {string} url
* @returns {string} complete https url
*/
function validateUrl(url) {
var httpRegex = /http:\/\//i;
var httpsRegex = /https:\/\//i;
var url = url.replace(httpRegex, 'https://');
if (!httpsRegex.exec(url)) {
return 'https://' + url;
}
return url;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment