Skip to content

Instantly share code, notes, and snippets.

@abfo
Last active April 25, 2022 08:35
Show Gist options
  • Save abfo/d4cc2f3914eddc35d24467028deec2c5 to your computer and use it in GitHub Desktop.
Save abfo/d4cc2f3914eddc35d24467028deec2c5 to your computer and use it in GitHub Desktop.
How to monitor page index status using the Google Search Console API and Apps Script in a Google Sheet. For instructions on how to configure this script please see https://ithoughthecamewithyou.com/post/monitor-page-index-status-with-google-sheets-apps-script-and-the-google-search-console-api
var AlertEmail = '';
var SitemapUrl = '';
var SearchConsoleProperty = '';
var WriteRetryCount = 3;
function onOpen() {
var ui = SpreadsheetApp.getUi();
ui.createMenu('Search Console')
.addItem('Update Data', 'runUpdate')
.addToUi();
}
function runUpdate() {
var pages = new Map();
Logger.log('Loading known pages...');
readPages(pages);
Logger.log('Adding any new pages from sitemap...');
updatePagesFromSitemap(pages);
Logger.log('Updating page data from search console...');
updatePagesFromGsc(pages);
Logger.log('Writing pages to sheet...');
writePages(pages);
Logger.log('Calculate coverage (optional, need coverage sheet)...');
calculateCoverage(pages);
}
function updatePagesFromGsc(pages){
var changes = [];
var day = new Date().getHours();
for(const[key, value] of pages) {
try {
if (value.checkDay != day) {
continue;
}
var request = {
"inspectionUrl": value.url,
"siteUrl": SearchConsoleProperty
};
var response = UrlFetchApp.fetch('https://searchconsole.googleapis.com/v1/urlInspection/index:inspect', {
headers: {
Authorization: 'Bearer ' + ScriptApp.getOAuthToken()
},
'method' : 'post',
'contentType' : 'application/json',
'payload' : JSON.stringify(request, null, 2)
});
var json = JSON.parse(response.getContentText());
var oldCoverageState = value.coverageState;
var newCoverageState = json.inspectionResult.indexStatusResult.coverageState;
if (oldCoverageState != newCoverageState) {
changes.push(value.url + ' indexStatusResult changed to ' + newCoverageState + ' from ' + oldCoverageState);
}
value.verdict = json.inspectionResult.indexStatusResult.verdict;
value.coverageState = newCoverageState;
value.robotsTxtState = json.inspectionResult.indexStatusResult.robotsTxtState;
value.indexingState = json.inspectionResult.indexStatusResult.indexingState;
value.lastCrawlTime = json.inspectionResult.indexStatusResult.lastCrawlTime;
value.pageFetchState = json.inspectionResult.indexStatusResult.pageFetchState;
value.googleCanonical = json.inspectionResult.indexStatusResult.googleCanonical;
value.userCanonical = json.inspectionResult.indexStatusResult.userCanonical;
} catch (err) {
Logger.log('Error processing url ' + err);
}
}
if (changes.length > 0) {
MailApp.sendEmail(AlertEmail, 'Google Search Console Monitor found changes on ' + new Date(), changes.join('\r\n'));
}
}
function updatePagesFromSitemap(pages) {
var sitemapXml = UrlFetchApp.fetch(SitemapUrl).getContentText();
var sitemapDoc = XmlService.parse(sitemapXml);
var root = sitemapDoc.getRootElement();
var ns = XmlService.getNamespace('http://www.sitemaps.org/schemas/sitemap/0.9');
var entries = root.getChildren('url', ns);
for (var i = 0; i < entries.length; i++) {
var loc = entries[i].getChild('loc', ns).getText();
var lastmod = ' ';
var lastmodElement = entries[i].getChild('lastmod', ns);
if (lastmodElement) {
lastmod = lastmodElement.getText();
}
if(!pages.has(loc)) {
var page = {};
page.url = loc;
page.lastmod = lastmod;
page.verdict = ' ';
page.coverageState = 'Added from sitemap';
page.robotsTxtState = ' ';
page.indexingState = ' ';
page.lastCrawlTime = ' ';
page.pageFetchState = ' ';
page.googleCanonical = ' ';
page.userCanonical = ' ';
page.checkDay = getRandomInt(24);
pages.set(loc, page);
}
}
}
function readPages(pages) {
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('gsc');
var lastRow = sheet.getLastRow();
var range = sheet.getRange(2, 1, lastRow, 11).getValues();
for (var row = 0; row <= (lastRow - 1); row++) {
var page = {};
page.url = range[row][0];
page.lastmod = range[row][1];
page.verdict = range[row][2];
page.coverageState = range[row][3];
page.robotsTxtState = range[row][4];
page.indexingState = range[row][5];
page.lastCrawlTime = range[row][6];
page.pageFetchState = range[row][7];
page.googleCanonical = range[row][8];
page.userCanonical = range[row][9];
page.checkDay = range[row][10];
pages.set(page.url, page);
}
}
function writePages(pages) {
for(var r = 1; r <= WriteRetryCount; r++){
try {
writePagesNoRetry(pages);
break;
} catch (err) {
if (r == WriteRetryCount) {
throw err;
} else {
Utilities.sleep(5000);
}
}
}
}
function writePagesNoRetry(pages) {
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('gsc');
// make sure we have enough space
var rowsNeeded = pages.size + 1;
var rowsAvailable = sheet.getMaxRows();
var rowsToAdd = rowsNeeded - rowsAvailable;
while(rowsToAdd >= 0) {
sheet.appendRow([' ']);
rowsToAdd--;
}
sheet.clear();
sheet.appendRow(['URL',
'Last Modified',
'Verdict',
'Coverage State',
'ROBOTS.TXT State',
'Indexing State',
'Last Crawl Time',
'Page Fetch State',
'Google Canonical',
'User Canonical',
'Check Day']);
sheetValues = [];
for(const[key, value] of pages) {
sheetValues.push([value.url,
value.lastmod,
value.verdict,
value.coverageState,
value.robotsTxtState,
value.indexingState,
value.lastCrawlTime,
value.pageFetchState,
value.googleCanonical,
value.userCanonical,
value.checkDay]);
}
sheet.getRange(2, 1, sheetValues.length, 11).setValues(sheetValues);
}
function getRandomInt(max) {
return Math.floor(Math.random() * max);
}
function calculateCoverage(pages) {
var hour = new Date().getHours();
if (hour == 20) {
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('coverage');
if (sheet) {
var total = 0;
var indexed = 0;
for(const[key, value] of pages) {
total++;
if ((value.coverageState == 'Submitted and indexed')
|| (value.coverageState == 'Indexed, not submitted in sitemap')
|| (value.coverageState == 'Duplicate, submitted URL not selected as canonical')) {
indexed++;
}
}
sheet.appendRow([new Date(), total, indexed, indexed / total]);
}
}
}
@emrecan-s
Copy link

Thanks for this, there is one small issue => Missing ' on var SitemapUrl = ';

@emrecan-s
Copy link

emrecan-s commented Apr 7, 2022

I see that people are struggling with auth and my suggestion would be to deal with the auth via manifest file by adding

{
"timeZone": "America/New_York",
"exceptionLogging": "STACKDRIVER",
"oauthScopes": ["https://www.googleapis.com/auth/script.external_request", "https://www.googleapis.com/auth/webmasters", "https://www.googleapis.com/auth/spreadsheets.currentonly", "https://www.googleapis.com/auth/script.send_mail"],
"runtimeVersion": "V8"
}

to the manifest file and getting the token with a single call var token = ScriptApp.getOAuthToken();

I modified few more things and works fine you can find it here https://gist.github.com/emrecan-s/1aebc0fdb74baa87f598c04314f5acca

@abfo
Copy link
Author

abfo commented Apr 24, 2022

Thanks, both changes included in the updated version, together with some better sheet read/write code that speeds the script up a lot.

@emrecan-s
Copy link

Thanks, also I am working on the batch call option as the script can't make 2k API calls with for loop. If you have any past experience with batch calls, any help would be appreciated.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment