Skip to content

Instantly share code, notes, and snippets.

@duncanmorris
Last active September 12, 2017 17:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save duncanmorris/5104509eddbdc5a2de11 to your computer and use it in GitHub Desktop.
Save duncanmorris/5104509eddbdc5a2de11 to your computer and use it in GitHub Desktop.
Using PhantomJS to monitor Google Analytics - full code
// initialise various variables
var page = require('webpage').create(),
system = require('system'),
address;
// how long should we wait for the page to load before we exit
// in ms
var WAIT_TIME = 5000;
// if the page hasn't loaded after this long, something is probably wrong.
// in ms
var MAX_EXECUTION_TIME = 15000;
// output error messages
var DEBUG = false
// a list of regular expressions of resources (urls) to log when we load them
var resources_to_log = [
new RegExp('^http(s)?://(www|ssl)\.google-analytics\.com.*'),
new RegExp('^http(s)?://stats\.g\.doubleclick\.net.*')
];
// check we have a url, if not exit
if (system.args.length === 1) {
console.log('Usage: get_ga_resources.js http://www.yoururl.com');
phantom.exit(1);
} else {
// address is the url passed
address = system.args[1];
// create a function that is called every time a resource is requested
// http://phantomjs.org/api/webpage/handler/on-resource-requested.html
page.onResourceRequested = function (res) {
// loop round all our regexs to see if this url matches any of them
var length = resources_to_log.length;
while(length--) {
if (resources_to_log[length].test(res.url)){
// we have a match, log it
console.log(res.url);
}
}
};
// if debug is true, log errors, else ignore them
page.onError = function(msg, trace){
if (DEBUG) {
console.log('ERROR: ' + msg)
console.log(trace)
}
};
// make a note of any errors so we can print them out
page.onResourceError = function(resourceError) {
page.reason = resourceError.errorString;
page.reason_url = resourceError.url;
};
// now all we have to do is open the page, wait WAIT_TIME ms and exit
try {
page.open(address, function (status) {
if (status !== 'success') {
console.log("FAILED: to load " + system.args[1]);
console.log(page.reason_url);
console.log(page.reason);
phantom.exit();
} else {
if (address != page.url){
console.log('Redirected: ' + page.url)
}
setTimeout(function () {
phantom.exit();
}, WAIT_TIME);
}
});
} finally {
// if we are still running after MAX_EXECUTION_TIME ms exit
setTimeout(function() {
console.log("FAILED: Max execution time " + Math.round(MAX_EXECUTION_TIME) + " seconds exceeded");
phantom.exit(1);
}, MAX_EXECUTION_TIME);
}
}
@duncanmorris
Copy link
Author

Usage: phantomjs get_ga_resources.js http://www.yoururl.com

NB - If you are requesting a https site, you made to run the code with an additional parameter

phantomjs --ssl-protocol=any get_ga_resources.js http://www.yoururl.com

There are additional parameters you could pass - http://phantomjs.org/api/command-line.html One of which gives the ability to ignore ssl errors

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment