Skip to content

Instantly share code, notes, and snippets.

Created May 22, 2014 20:00
Show Gist options
  • Save anonymous/0350b92759e5ea8822ed to your computer and use it in GitHub Desktop.
Save anonymous/0350b92759e5ea8822ed to your computer and use it in GitHub Desktop.
var SCRIPT_NAME = 'Broken Url Checker';
var LOG_LEVEL = 'error'; //change this to debug if you want more logging
var NOTIFY = ['sdao@propelmarketing.com'];
var SPREADSHEET_PREFIX = 'Broken Url Details'; // A timestamp is appended
var NOTIFY_ON_ERROR = ['sdao@propelmarketing.com'];
var STRIP_QUERY_STRING = true; //Drop everything after the ? in the url to speed things up
var REPORT_ON_REDIRECTS = true; //If you want to be able to track 301s and 302, turn this on
var VALID_RESPONSE_CODES = [200,301,302];
var URLS_CHECKED_FILE_NAME = 'UrlsAlreadyChecked-'+AdWordsApp.currentAccount().getCustomerId()+'.json';
var DONE_LABEL_PREFIX = 'All Urls Checked - ';
function main() {
MccApp.accounts().withLimit(50).executeInParallel('checkUrls', 'reportResults');
}
function checkUrls() {
try {
debug('Processing account: '+AdWordsApp.currentAccount().getName());
debug('Checking to see if we finished processing for today.');
var dateStr = Utilities.formatDate(new Date(), AdWordsApp.currentAccount().getTimeZone(), 'yyyy-MM-dd');
var finishedLabelName = DONE_LABEL_PREFIX+dateStr;
var alreadyDone = AdWordsApp.labels().withCondition("Name = '"+finishedLabelName+"'").get().hasNext();
if(alreadyDone) {
info('All urls have been checked for today.');
return '';
}
var labelIter = AdWordsApp.labels().withCondition("Name STARTS_WITH '"+DONE_LABEL_PREFIX+"'").get();
while(labelIter.hasNext()) { labelIter.next().remove(); }
debug('Checking for previous urls.');
var urlsAlreadyChecked = readValidUrlsFromJSON();
info('Found '+Object.keys(urlsAlreadyChecked).length+' urls already checked.');
var toReportKeywords = [];
var toReportAds = [];
var didExitEarly = false;
var keywordUrls = getKeywordUrls();
for(var key in keywordUrls) {
var kwRow = keywordUrls[key];
var url = cleanUrl(kwRow.DestinationUrl);
verifyUrl(kwRow,url,urlsAlreadyChecked,toReportKeywords);
if(shouldExitEarly()) { didExitEarly = true; break; }
}
if(!didExitEarly) {
var adUrls = getAdUrls();
for(var i in adUrls) {
var adRow = adUrls[i];
if(adRow.CreativeDestinationUrl) {
var url = cleanUrl(adRow.CreativeDestinationUrl);
verifyUrl(adRow,url,urlsAlreadyChecked,toReportAds);
}
if(adRow.ImageAdUrl) {
var url = cleanUrl(adRow.CreativeDestinationUrl);
verifyUrl(adRow,url,urlsAlreadyChecked,toReportAds);
}
if(shouldExitEarly()) { didExitEarly = true; break; }
}
}
var returnData = {
accountId : AdWordsApp.currentAccount().getCustomerId(),
accountName : AdWordsApp.currentAccount().getName(),
uniqueUrlsChecked : Object.keys(urlsAlreadyChecked).length,
brokenKeywords : toReportKeywords,
brokenAds : toReportAds,
didExitEarly : didExitEarly
};
if(didExitEarly) {
writeValidUrlsToJSON(urlsAlreadyChecked);
} else {
AdWordsApp.createLabel(finishedLabelName, 'Label created by '+SCRIPT_NAME, '#C0C0C0');
writeValidUrlsToJSON({});
}
return JSON.stringify(returnData);
} catch(e) {
// This error handling helps notify you when things don't work out well.
error(e);
if(MailApp.getRemainingDailyQuota() >= NOTIFY_ON_ERROR.length) {
var acctName = AdWordsApp.currentAccount().getName();
var acctId = AdWordsApp.currentAccount().getCustomerId();
for(var i in NOTIFY_ON_ERROR) {
info('Sending mail to: '+NOTIFY_ON_ERROR[i]);
MailApp.sendEmail(NOTIFY_ON_ERROR[i], 'ERROR: '+SCRIPT_NAME+' - '+acctName+' - ('+acctId+')', e);
}
} else {
error('Out of email quota for the day. Sending a carrier pigeon.');
}
return '';
}
function shouldExitEarly() {
return (AdWordsApp.getExecutionInfo().getRemainingTime() < 60);
}
function verifyUrl(row,url,urlsAlreadyChecked,toReport) {
if(!urlsAlreadyChecked[url]) {
info('Checking url: ' + url);
var urlCheckResults = checkUrl(url);
if(!urlCheckResults.isValid) {
row['cleanUrl'] = url;
row['responseCode'] = urlCheckResults.responseCode;
toReport.push(row);
}
urlsAlreadyChecked[url] = urlCheckResults;
} else {
if(!urlsAlreadyChecked[url].isValid) {
row['cleanUrl'] = url;
row['responseCode'] = urlsAlreadyChecked[url].responseCode;
toReport.push(row);
}
}
}
function checkUrl(url) {
var retVal = { responseCode : -1, isValid: false };
var httpOptions = {
muteHttpExceptions:true,
followRedirects:(!REPORT_ON_REDIRECTS)
};
try {
retVal.responseCode = UrlFetchApp.fetch(url, httpOptions).getResponseCode();
retVal.isValid = isValidResponseCode(retVal.responseCode);
} catch(e) {
warn(e.message);
//Something is wrong here, we should know about it.
retVal.isValid = false;
}
return retVal;
}
function isValidResponseCode(resp) {
return (VALID_RESPONSE_CODES.indexOf(resp) >= 0);
}
//Clean the url of query strings and valuetrack params
function cleanUrl(url) {
if(STRIP_QUERY_STRING) {
if(url.indexOf('?')>=0) {
url = url.split('?')[0];
}
}
if(url.indexOf('{') >= 0) {
//Let's remove the value track parameters
url = url.replace(/\{[^\}]*\}/g,'');
}
return url;
}
//Use the reporting API to pull this information because it is super fast.
//The documentation for this is here: http://goo.gl/IfMb31
function getKeywordUrls() {
var OPTIONS = { includeZeroImpressions : true };
var cols = ['CampaignId','CampaignName',
'AdGroupId','AdGroupName',
'Id','KeywordText','KeywordMatchType',
'IsNegative','DestinationUrl','Impressions'];
var report = 'KEYWORDS_PERFORMANCE_REPORT';
var query = ['select',cols.join(','),'from',report,
'where CampaignStatus = ACTIVE',
'and AdGroupStatus = ENABLED',
'and Status = ACTIVE',
'during','LAST_7_DAYS'].join(' ');
var results = {};
var reportIter = AdWordsApp.report(query, OPTIONS).rows();
while(reportIter.hasNext()) {
var row = reportIter.next();
if(row.IsNegative === 'true') { continue; }
if(!row.DestinationUrl) { continue; }
if(row.KeywordMatchType === 'Exact') {
row.KeywordText = ['[',row.KeywordText,']'].join('');
} else if(row.KeywordMatchType === 'Phrase') {
row.KeywordText = ['"',row.KeywordText,'"'].join('');
}
var rowKey = [row.CampaignId,row.AdGroupId,row.Id].join('-');
results[rowKey] = row;
}
return results;
}
//Use the reporting API to pull this information because it is super fast.
//The documentation for this is here: http://goo.gl/8RHTBj
function getAdUrls() {
var OPTIONS = { includeZeroImpressions : true };
var cols = ['CampaignId','CampaignName',
'AdGroupId','AdGroupName',
'AdType',
'Id','Headline','Description1','Description2','DisplayUrl',
'CreativeDestinationUrl','ImageAdUrl','Impressions'];
var report = 'AD_PERFORMANCE_REPORT';
var query = ['select',cols.join(','),'from',report,
'where CampaignStatus = ACTIVE',
'and AdGroupStatus = ENABLED',
'and Status = ENABLED',
'during','TODAY'].join(' ');
var results = {};
var reportIter = AdWordsApp.report(query, OPTIONS).rows();
while(reportIter.hasNext()) {
var row = reportIter.next();
if(!row.CreativeDestinationUrl || !row.ImageAdUrl) { continue; }
var rowKey = [row.CampaignId,row.AdGroupId,row.Id].join('-');
if(row.ImageAdUrl !== '--') {
row.ImageAdUrl = ('https://tpc.googlesyndication.com/pageadimg/imgad?id='+row.ImageAdUrl);
} else {
row.ImageAdUrl = '';
}
results[rowKey] = row;
}
return results;
}
//This function quickly writes the url data to a file
//that can be loaded again for the next run
function writeValidUrlsToJSON(toWrite) {
var file = getFile(URLS_CHECKED_FILE_NAME,false);
file.setContent(JSON.stringify(toWrite));
}
//And this loads that stored file and converts it to an object
function readValidUrlsFromJSON() {
var file = getFile(URLS_CHECKED_FILE_NAME,false);
var fileData = file.getBlob().getDataAsString();
if(fileData) {
return JSON.parse(fileData);
} else {
return {};
}
}
}
//This is the callback function that collects all the data from the scripts
//that were run in parallel on each account. More details can be found here:
// http://goo.gl/BvOPZo
function reportResults(responses) {
var summaryEmailData = [];
var dateTimeStr = Utilities.formatDate(new Date(), AdWordsApp.currentAccount().getTimeZone(), 'yyyy-MM-dd HH:m:s');
var spreadsheetName = SPREADSHEET_PREFIX+' - '+dateTimeStr;
for(var i in responses) {
if(!responses[i].getReturnValue()) { continue; }
var res = JSON.parse(responses[i].getReturnValue());
var sheetUrl = writeResultsToSpreadsheet(res,spreadsheetName);
summaryEmailData.push({accountId:res.accountId,
accountName:res.accountName,
didExitEarly:res.didExitEarly,
uniqueUrlsChecked:res.uniqueUrlsChecked,
numBrokenKeywords:res.brokenKeywords.length,
numBrokenAds:res.brokenAds.length,
sheetUrl: sheetUrl});
}
if(summaryEmailData.length > 0) {
sendSummaryEmail(summaryEmailData);
}
function writeResultsToSpreadsheet(res,name) {
var file = getFile(name,true);
var spreadsheet;
var maxRetries = 0;
while(maxRetries < 3) {
try {
spreadsheet = SpreadsheetApp.openById(file.getId());
break;
} catch(e) {
maxRetries++;
Utilities.sleep(1000);
}
}
if(!spreadsheet) { throw 'Could not open file: '+name; }
if(spreadsheet.getSheetByName('Sheet1')) {
spreadsheet.getSheetByName('Sheet1').setName(res.accountId);
}
var sheet = spreadsheet.getSheetByName(res.accountId);
if(!sheet) {
sheet = spreadsheet.insertSheet(res.accountId, spreadsheet.getSheets().length);
}
var toWrite = [['Type','Clean Url','Response Code','Campaign Name','AdGroup Name','Text','Full Url']];
for(var i in res.brokenKeywords) {
var row = res.brokenKeywords[i];
toWrite.push(['Keyword',
row.cleanUrl,
row.responseCode,
row.CampaignName,
row.AdGroupName,
row.KeywordText,
row.DestinationUrl]);
}
for(var i in res.brokenAds) {
var row = res.brokenAds[i];
toWrite.push([row.AdType,
row.cleanUrl,
row.responseCode,
row.CampaignName,
row.AdGroupName,
(row.Headline) ? [row.Headline,row.Description1,row.Description2,row.DisplayUrl].join('|') : '',
(row.CreativeDestinationUrl) ? row.CreativeDestinationUrl : row.ImageAdUrl]);
}
var lastRow = sheet.getLastRow();
var numRows = sheet.getMaxRows();
if((numRows-lastRow) < toWrite.length) {
sheet.insertRowsAfter(lastRow,toWrite.length-numRows+lastRow);
}
var range = sheet.getRange(lastRow+1,1,toWrite.length,toWrite[0].length);
range.setValues(toWrite);
if((sheet.getMaxColumns() - sheet.getLastColumn()) > 0) {
sheet.deleteColumns(sheet.getLastColumn()+1, sheet.getMaxColumns() - sheet.getLastColumn());
}
file = DriveApp.getFileById(spreadsheet.getId());
try {
file.setSharing(DriveApp.Access.ANYONE_WITH_LINK, DriveApp.Permission.VIEW);
} catch(e) {
file.setSharing(DriveApp.Access.DOMAIN_WITH_LINK, DriveApp.Permission.VIEW);
}
//This gives you a link directly to the spreadsheet sheet.
return (spreadsheet.getUrl() + '#gid=' + sheet.getSheetId());
}
//This function builds the summary email and sends it to the people in
//the NOTIFY list
function sendSummaryEmail(summaryEmailData) {
var subject = SCRIPT_NAME+' Summary Results';
var body = subject;
var htmlBody = '<html><body>'+subject;
htmlBody += '<br/ >Should strip query strings: '+STRIP_QUERY_STRING;
htmlBody += '<br/ >Report on redirects: '+REPORT_ON_REDIRECTS;
htmlBody += '<br/ >Valid response codes: '+VALID_RESPONSE_CODES;
htmlBody += '<br/ ><br/ >';
htmlBody += '<table border="1" width="95%" style="border-collapse:collapse;">';
htmlBody += '<tr>';
htmlBody += '<td align="left"><b>Acct Id</b></td>';
htmlBody += '<td align="left"><b>Acct Name</b></td>';
htmlBody += '<td align="left"><b>Exited Early</b></td>';
htmlBody += '<td align="center"><b>Unique Urls Checked</b></td>';
htmlBody += '<td align="center"><b># Broken Keyword Urls</b></td>';
htmlBody += '<td align="center"><b># Broken Ad Urls</b></td>';
htmlBody += '<td align="center"><b>Full Report</b></td>';
htmlBody += '</tr>';
for(var i in summaryEmailData) {
var row = summaryEmailData[i];
htmlBody += '<tr><td align="left">'+ row.accountId +
'</td><td align="left">' + row.accountName +
'</td><td align="left">' + row.didExitEarly +
'</td><td align="center">' + row.uniqueUrlsChecked +
'</td><td align="center">' + row.numBrokenKeywords +
'</td><td align="center">' + row.numBrokenAds +
'</td><td align="left"><a href="'+row.sheetUrl+'">' + 'Show Details' +
'</a></td></tr>';
}
htmlBody += '</table>';
htmlBody += '<br/ >';
htmlBody += Utilities.formatDate(new Date(),AdWordsApp.currentAccount().getTimeZone(),'MMMM dd, yyyy @ hh:mma z');
htmlBody += '. Completed. '+Object.keys(summaryEmailData).length+' Accounts checked.';
htmlBody += '</body></html>';
var options = { htmlBody : htmlBody };
for(var i in NOTIFY) {
MailApp.sendEmail(NOTIFY[i], subject, body, options);
}
}
}
//This function finds a given file on Google Drive
//If it does not exist, it creates a new file
//if isSpreadsheet is set, it will create a new spreadsheet
//otherwise, it creates a text file.
function getFile(fileName,isSpreadsheet) {
var maxRetries = 0;
var errors = [];
while(maxRetries < 3) {
try {
var fileIter = DriveApp.getFilesByName(fileName);
if(!fileIter.hasNext()) {
info('Could not find file: '+fileName+' on Google Drive. Creating new file.');
if(isSpreadsheet) {
return SpreadsheetApp.create(fileName);
} else {
return DriveApp.createFile(fileName,'');
}
} else {
return fileIter.next();
}
} catch(e) {
errors.push(e);
maxRetries++;
Utilities.sleep(1000);
}
}
if(maxRetries == 3) {
throw errors.join('. ');
}
}
//Some functions to help with logging
var LOG_LEVELS = { 'error':1, 'warn':2, 'info':3, 'debug':4 };
function error(msg) { if(LOG_LEVELS['error'] <= LOG_LEVELS[LOG_LEVEL]) { log('ERROR',msg); } }
function warn(msg) { if(LOG_LEVELS['warn'] <= LOG_LEVELS[LOG_LEVEL]) { log('WARN' ,msg); } }
function info(msg) { if(LOG_LEVELS['info'] <= LOG_LEVELS[LOG_LEVEL]) { log('INFO' ,msg); } }
function debug(msg) { if(LOG_LEVELS['debug'] <= LOG_LEVELS[LOG_LEVEL]) { log('DEBUG',msg); } }
function log(type,msg) { Logger.log(type + ' - ' + msg); }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment