Last active
April 25, 2022 08:35
-
-
Save abfo/d4cc2f3914eddc35d24467028deec2c5 to your computer and use it in GitHub Desktop.
How to monitor page index status using the Google Search Console API and Apps Script in a Google Sheet. For instructions on how to configure this script please see https://ithoughthecamewithyou.com/post/monitor-page-index-status-with-google-sheets-apps-script-and-the-google-search-console-api
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var AlertEmail = ''; | |
var SitemapUrl = ''; | |
var SearchConsoleProperty = ''; | |
var WriteRetryCount = 3; | |
function onOpen() { | |
var ui = SpreadsheetApp.getUi(); | |
ui.createMenu('Search Console') | |
.addItem('Update Data', 'runUpdate') | |
.addToUi(); | |
} | |
function runUpdate() { | |
var pages = new Map(); | |
Logger.log('Loading known pages...'); | |
readPages(pages); | |
Logger.log('Adding any new pages from sitemap...'); | |
updatePagesFromSitemap(pages); | |
Logger.log('Updating page data from search console...'); | |
updatePagesFromGsc(pages); | |
Logger.log('Writing pages to sheet...'); | |
writePages(pages); | |
Logger.log('Calculate coverage (optional, need coverage sheet)...'); | |
calculateCoverage(pages); | |
} | |
function updatePagesFromGsc(pages){ | |
var changes = []; | |
var day = new Date().getHours(); | |
for(const[key, value] of pages) { | |
try { | |
if (value.checkDay != day) { | |
continue; | |
} | |
var request = { | |
"inspectionUrl": value.url, | |
"siteUrl": SearchConsoleProperty | |
}; | |
var response = UrlFetchApp.fetch('https://searchconsole.googleapis.com/v1/urlInspection/index:inspect', { | |
headers: { | |
Authorization: 'Bearer ' + ScriptApp.getOAuthToken() | |
}, | |
'method' : 'post', | |
'contentType' : 'application/json', | |
'payload' : JSON.stringify(request, null, 2) | |
}); | |
var json = JSON.parse(response.getContentText()); | |
var oldCoverageState = value.coverageState; | |
var newCoverageState = json.inspectionResult.indexStatusResult.coverageState; | |
if (oldCoverageState != newCoverageState) { | |
changes.push(value.url + ' indexStatusResult changed to ' + newCoverageState + ' from ' + oldCoverageState); | |
} | |
value.verdict = json.inspectionResult.indexStatusResult.verdict; | |
value.coverageState = newCoverageState; | |
value.robotsTxtState = json.inspectionResult.indexStatusResult.robotsTxtState; | |
value.indexingState = json.inspectionResult.indexStatusResult.indexingState; | |
value.lastCrawlTime = json.inspectionResult.indexStatusResult.lastCrawlTime; | |
value.pageFetchState = json.inspectionResult.indexStatusResult.pageFetchState; | |
value.googleCanonical = json.inspectionResult.indexStatusResult.googleCanonical; | |
value.userCanonical = json.inspectionResult.indexStatusResult.userCanonical; | |
} catch (err) { | |
Logger.log('Error processing url ' + err); | |
} | |
} | |
if (changes.length > 0) { | |
MailApp.sendEmail(AlertEmail, 'Google Search Console Monitor found changes on ' + new Date(), changes.join('\r\n')); | |
} | |
} | |
function updatePagesFromSitemap(pages) { | |
var sitemapXml = UrlFetchApp.fetch(SitemapUrl).getContentText(); | |
var sitemapDoc = XmlService.parse(sitemapXml); | |
var root = sitemapDoc.getRootElement(); | |
var ns = XmlService.getNamespace('http://www.sitemaps.org/schemas/sitemap/0.9'); | |
var entries = root.getChildren('url', ns); | |
for (var i = 0; i < entries.length; i++) { | |
var loc = entries[i].getChild('loc', ns).getText(); | |
var lastmod = ' '; | |
var lastmodElement = entries[i].getChild('lastmod', ns); | |
if (lastmodElement) { | |
lastmod = lastmodElement.getText(); | |
} | |
if(!pages.has(loc)) { | |
var page = {}; | |
page.url = loc; | |
page.lastmod = lastmod; | |
page.verdict = ' '; | |
page.coverageState = 'Added from sitemap'; | |
page.robotsTxtState = ' '; | |
page.indexingState = ' '; | |
page.lastCrawlTime = ' '; | |
page.pageFetchState = ' '; | |
page.googleCanonical = ' '; | |
page.userCanonical = ' '; | |
page.checkDay = getRandomInt(24); | |
pages.set(loc, page); | |
} | |
} | |
} | |
function readPages(pages) { | |
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('gsc'); | |
var lastRow = sheet.getLastRow(); | |
var range = sheet.getRange(2, 1, lastRow, 11).getValues(); | |
for (var row = 0; row <= (lastRow - 1); row++) { | |
var page = {}; | |
page.url = range[row][0]; | |
page.lastmod = range[row][1]; | |
page.verdict = range[row][2]; | |
page.coverageState = range[row][3]; | |
page.robotsTxtState = range[row][4]; | |
page.indexingState = range[row][5]; | |
page.lastCrawlTime = range[row][6]; | |
page.pageFetchState = range[row][7]; | |
page.googleCanonical = range[row][8]; | |
page.userCanonical = range[row][9]; | |
page.checkDay = range[row][10]; | |
pages.set(page.url, page); | |
} | |
} | |
function writePages(pages) { | |
for(var r = 1; r <= WriteRetryCount; r++){ | |
try { | |
writePagesNoRetry(pages); | |
break; | |
} catch (err) { | |
if (r == WriteRetryCount) { | |
throw err; | |
} else { | |
Utilities.sleep(5000); | |
} | |
} | |
} | |
} | |
function writePagesNoRetry(pages) { | |
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('gsc'); | |
// make sure we have enough space | |
var rowsNeeded = pages.size + 1; | |
var rowsAvailable = sheet.getMaxRows(); | |
var rowsToAdd = rowsNeeded - rowsAvailable; | |
while(rowsToAdd >= 0) { | |
sheet.appendRow([' ']); | |
rowsToAdd--; | |
} | |
sheet.clear(); | |
sheet.appendRow(['URL', | |
'Last Modified', | |
'Verdict', | |
'Coverage State', | |
'ROBOTS.TXT State', | |
'Indexing State', | |
'Last Crawl Time', | |
'Page Fetch State', | |
'Google Canonical', | |
'User Canonical', | |
'Check Day']); | |
sheetValues = []; | |
for(const[key, value] of pages) { | |
sheetValues.push([value.url, | |
value.lastmod, | |
value.verdict, | |
value.coverageState, | |
value.robotsTxtState, | |
value.indexingState, | |
value.lastCrawlTime, | |
value.pageFetchState, | |
value.googleCanonical, | |
value.userCanonical, | |
value.checkDay]); | |
} | |
sheet.getRange(2, 1, sheetValues.length, 11).setValues(sheetValues); | |
} | |
function getRandomInt(max) { | |
return Math.floor(Math.random() * max); | |
} | |
function calculateCoverage(pages) { | |
var hour = new Date().getHours(); | |
if (hour == 20) { | |
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('coverage'); | |
if (sheet) { | |
var total = 0; | |
var indexed = 0; | |
for(const[key, value] of pages) { | |
total++; | |
if ((value.coverageState == 'Submitted and indexed') | |
|| (value.coverageState == 'Indexed, not submitted in sitemap') | |
|| (value.coverageState == 'Duplicate, submitted URL not selected as canonical')) { | |
indexed++; | |
} | |
} | |
sheet.appendRow([new Date(), total, indexed, indexed / total]); | |
} | |
} | |
} |
Thanks, also I am working on the batch call option as the script can't make 2k API calls with for loop. If you have any past experience with batch calls, any help would be appreciated.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks, both changes included in the updated version, together with some better sheet read/write code that speeds the script up a lot.