Skip to content

Instantly share code, notes, and snippets.

@crewstyle
Last active March 30, 2017 22:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save crewstyle/ab9856f53f38f96cd24049e5263f07e4 to your computer and use it in GitHub Desktop.
Save crewstyle/ab9856f53f38f96cd24049e5263f07e4 to your computer and use it in GitHub Desktop.
Google Spreadsheet - How to get data from URL's headers, using CacheService and UrlFetchApp class services
/**
* This script has been especially made for a particular context.
* So use it with caution, and do not forget to customize it before any use.
*/
//globals
var sheet = SpreadsheetApp.getActiveSpreadsheet(),
sheets = sheet.getSheets();
/**
* Retrieve header Response Code and Location from cache.
*/
function getDataFromCache(url) {
var encodedUrl = encodeURIComponent(url),
cache = CacheService.getPublicCache();
//get data from cache
var details = cache.get(encodedUrl);
//check cache
if (details != null && details != []) {
return details;
}
//get details from domainname.ext and www.domainname.ext
var data = getUrlDetails(url, false),
datawww = getUrlDetails(url, true);
//redefine details
details = [];
//store details
if (data != null && datawww != null) {
details.push(data.getResponseCode());
details.push(data.getHeaders()['Location']);
details.push(datawww.getResponseCode());
details.push(datawww.getHeaders()['Location']);
}
//cache will be good for around 3600 seconds (1 hour)
cache.put(encodedUrl, details, 3600);
return details;
}
/**
* Retrieve header Response Code and Location from URL depending on the subdomain "www".
* All URLs are in normal HTTP protocol. No Secure protocol allowed.
*/
function getUrlDetails(url, www) {
var fetch;
try {
fetch = UrlFetchApp.fetch("http://" + (www ? "www." : "") + url, {
followRedirects: false,
muteHttpExceptions: true
});
} catch(e) {}
return fetch;
}
/**
* Update URLS
*/
function main()
{
//configurations
var lineNumber = 100,
loadingMsg = "...",
currentSheet = sheets[2];
//cells
var cellmin = currentSheet.getRange("M2"),
cellmax = currentSheet.getRange("N2");
//check min
var min = parseInt(cellmin.getValue(), 10),
max = parseInt(cellmax.getValue(), 10);
//works on min
min = 4 > min ? 4 : min;
cellmin.setValue(min);
//works on max
max = (min > max || max > (min + lineNumber) || max > 978) ? (min + lineNumber) : max;
cellmax.setValue(max);
//vars
var val, check, cellh;
//check URLS
for (var i = min, len = max; i <= len; i++) {
cellmax.setValue(i);
cellh = currentSheet.getRange("H"+i);
//get the value
val = currentSheet.getRange("B"+i).getValue();
check = cellh.getValue();
//works on val
val = loadingMsg == val ? "" : val;
//check value
if ("" == val || "" != check) {
continue;
}
//loading
currentSheet.getRange("H"+i).setValue(loadingMsg);
//get complete url with schema
var completeurl = "http://www." + val + "/";
//get url headers for domain name w/out subdomain
//details = [responseCode, location, wwwResponseCode, wwwLocation];
var details = getDataFromCache(val);
Logger.log(details);
//loading
cellh.setValue("");
//check values
if (4 == details.length) {
//update data
cellh.setValue(details[0]);
currentSheet.getRange("I"+i).setValue(details[1]);
currentSheet.getRange("J"+i).setValue(details[1] == completeurl ? "OK" : "KO");
//update www.data
currentSheet.getRange("K"+i).setValue(details[2]);
currentSheet.getRange("L"+i).setValue(details[3]);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment