-
-
Save abfo/d4cc2f3914eddc35d24467028deec2c5 to your computer and use it in GitHub Desktop.
var AlertEmail = ''; | |
var SitemapUrl = ''; | |
var SearchConsoleProperty = ''; | |
var WriteRetryCount = 3; | |
function onOpen() { | |
var ui = SpreadsheetApp.getUi(); | |
ui.createMenu('Search Console') | |
.addItem('Update Data', 'runUpdate') | |
.addToUi(); | |
} | |
function runUpdate() { | |
var pages = new Map(); | |
Logger.log('Loading known pages...'); | |
readPages(pages); | |
Logger.log('Adding any new pages from sitemap...'); | |
updatePagesFromSitemap(pages); | |
Logger.log('Updating page data from search console...'); | |
updatePagesFromGsc(pages); | |
Logger.log('Writing pages to sheet...'); | |
writePages(pages); | |
Logger.log('Calculate coverage (optional, need coverage sheet)...'); | |
calculateCoverage(pages); | |
} | |
function updatePagesFromGsc(pages){ | |
var changes = []; | |
var day = new Date().getHours(); | |
for(const[key, value] of pages) { | |
try { | |
if (value.checkDay != day) { | |
continue; | |
} | |
var request = { | |
"inspectionUrl": value.url, | |
"siteUrl": SearchConsoleProperty | |
}; | |
var response = UrlFetchApp.fetch('https://searchconsole.googleapis.com/v1/urlInspection/index:inspect', { | |
headers: { | |
Authorization: 'Bearer ' + ScriptApp.getOAuthToken() | |
}, | |
'method' : 'post', | |
'contentType' : 'application/json', | |
'payload' : JSON.stringify(request, null, 2) | |
}); | |
var json = JSON.parse(response.getContentText()); | |
var oldCoverageState = value.coverageState; | |
var newCoverageState = json.inspectionResult.indexStatusResult.coverageState; | |
if (oldCoverageState != newCoverageState) { | |
changes.push(value.url + ' indexStatusResult changed to ' + newCoverageState + ' from ' + oldCoverageState); | |
} | |
value.verdict = json.inspectionResult.indexStatusResult.verdict; | |
value.coverageState = newCoverageState; | |
value.robotsTxtState = json.inspectionResult.indexStatusResult.robotsTxtState; | |
value.indexingState = json.inspectionResult.indexStatusResult.indexingState; | |
value.lastCrawlTime = json.inspectionResult.indexStatusResult.lastCrawlTime; | |
value.pageFetchState = json.inspectionResult.indexStatusResult.pageFetchState; | |
value.googleCanonical = json.inspectionResult.indexStatusResult.googleCanonical; | |
value.userCanonical = json.inspectionResult.indexStatusResult.userCanonical; | |
} catch (err) { | |
Logger.log('Error processing url ' + err); | |
} | |
} | |
if (changes.length > 0) { | |
MailApp.sendEmail(AlertEmail, 'Google Search Console Monitor found changes on ' + new Date(), changes.join('\r\n')); | |
} | |
} | |
function updatePagesFromSitemap(pages) { | |
var sitemapXml = UrlFetchApp.fetch(SitemapUrl).getContentText(); | |
var sitemapDoc = XmlService.parse(sitemapXml); | |
var root = sitemapDoc.getRootElement(); | |
var ns = XmlService.getNamespace('http://www.sitemaps.org/schemas/sitemap/0.9'); | |
var entries = root.getChildren('url', ns); | |
for (var i = 0; i < entries.length; i++) { | |
var loc = entries[i].getChild('loc', ns).getText(); | |
var lastmod = ' '; | |
var lastmodElement = entries[i].getChild('lastmod', ns); | |
if (lastmodElement) { | |
lastmod = lastmodElement.getText(); | |
} | |
if(!pages.has(loc)) { | |
var page = {}; | |
page.url = loc; | |
page.lastmod = lastmod; | |
page.verdict = ' '; | |
page.coverageState = 'Added from sitemap'; | |
page.robotsTxtState = ' '; | |
page.indexingState = ' '; | |
page.lastCrawlTime = ' '; | |
page.pageFetchState = ' '; | |
page.googleCanonical = ' '; | |
page.userCanonical = ' '; | |
page.checkDay = getRandomInt(24); | |
pages.set(loc, page); | |
} | |
} | |
} | |
function readPages(pages) { | |
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('gsc'); | |
var lastRow = sheet.getLastRow(); | |
var range = sheet.getRange(2, 1, lastRow, 11).getValues(); | |
for (var row = 0; row <= (lastRow - 1); row++) { | |
var page = {}; | |
page.url = range[row][0]; | |
page.lastmod = range[row][1]; | |
page.verdict = range[row][2]; | |
page.coverageState = range[row][3]; | |
page.robotsTxtState = range[row][4]; | |
page.indexingState = range[row][5]; | |
page.lastCrawlTime = range[row][6]; | |
page.pageFetchState = range[row][7]; | |
page.googleCanonical = range[row][8]; | |
page.userCanonical = range[row][9]; | |
page.checkDay = range[row][10]; | |
pages.set(page.url, page); | |
} | |
} | |
function writePages(pages) { | |
for(var r = 1; r <= WriteRetryCount; r++){ | |
try { | |
writePagesNoRetry(pages); | |
break; | |
} catch (err) { | |
if (r == WriteRetryCount) { | |
throw err; | |
} else { | |
Utilities.sleep(5000); | |
} | |
} | |
} | |
} | |
function writePagesNoRetry(pages) { | |
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('gsc'); | |
// make sure we have enough space | |
var rowsNeeded = pages.size + 1; | |
var rowsAvailable = sheet.getMaxRows(); | |
var rowsToAdd = rowsNeeded - rowsAvailable; | |
while(rowsToAdd >= 0) { | |
sheet.appendRow([' ']); | |
rowsToAdd--; | |
} | |
sheet.clear(); | |
sheet.appendRow(['URL', | |
'Last Modified', | |
'Verdict', | |
'Coverage State', | |
'ROBOTS.TXT State', | |
'Indexing State', | |
'Last Crawl Time', | |
'Page Fetch State', | |
'Google Canonical', | |
'User Canonical', | |
'Check Day']); | |
sheetValues = []; | |
for(const[key, value] of pages) { | |
sheetValues.push([value.url, | |
value.lastmod, | |
value.verdict, | |
value.coverageState, | |
value.robotsTxtState, | |
value.indexingState, | |
value.lastCrawlTime, | |
value.pageFetchState, | |
value.googleCanonical, | |
value.userCanonical, | |
value.checkDay]); | |
} | |
sheet.getRange(2, 1, sheetValues.length, 11).setValues(sheetValues); | |
} | |
function getRandomInt(max) { | |
return Math.floor(Math.random() * max); | |
} | |
function calculateCoverage(pages) { | |
var hour = new Date().getHours(); | |
if (hour == 20) { | |
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('coverage'); | |
if (sheet) { | |
var total = 0; | |
var indexed = 0; | |
for(const[key, value] of pages) { | |
total++; | |
if ((value.coverageState == 'Submitted and indexed') | |
|| (value.coverageState == 'Indexed, not submitted in sitemap') | |
|| (value.coverageState == 'Duplicate, submitted URL not selected as canonical')) { | |
indexed++; | |
} | |
} | |
sheet.appendRow([new Date(), total, indexed, indexed / total]); | |
} | |
} | |
} |
I see that people are struggling with auth and my suggestion would be to deal with the auth via manifest file by adding
{
"timeZone": "America/New_York",
"exceptionLogging": "STACKDRIVER",
"oauthScopes": ["https://www.googleapis.com/auth/script.external_request", "https://www.googleapis.com/auth/webmasters", "https://www.googleapis.com/auth/spreadsheets.currentonly", "https://www.googleapis.com/auth/script.send_mail"],
"runtimeVersion": "V8"
}
to the manifest file and getting the token with a single call var token = ScriptApp.getOAuthToken();
I modified few more things and works fine you can find it here https://gist.github.com/emrecan-s/1aebc0fdb74baa87f598c04314f5acca
Thanks, both changes included in the updated version, together with some better sheet read/write code that speeds the script up a lot.
Thanks, also I am working on the batch call option as the script can't make 2k API calls with for loop. If you have any past experience with batch calls, any help would be appreciated.
Thanks for this, there is one small issue => Missing ' on var
SitemapUrl = ';