Skip to content

Instantly share code, notes, and snippets.

@prastut
Created July 26, 2017 18:30
Show Gist options
  • Save prastut/2fde3d4ab3cf92b0e7c9fed9820589d1 to your computer and use it in GitHub Desktop.
Save prastut/2fde3d4ab3cf92b0e7c9fed9820589d1 to your computer and use it in GitHub Desktop.
Filter GA requests
var page = require('webpage').create(),
system = require('system'),
fs = require('fs'),
file = 'data.json',
address;
// how long should we wait for the page to load before we exit
// in ms
var WAIT_TIME = 1000;
// if the page hasn't loaded after this long, something is probably wrong.
// in ms
var MAX_EXECUTION_TIME = 15000;
// output error messages
var DEBUG = false
var url;
// a list of regular expressions of resources (urls) to log when we load them
var resources_to_log = [
new RegExp('^http(s)?://(www|ssl)\.google-analytics\.com.*'),
new RegExp('^http(s)?://stats\.g\.doubleclick\.net.*')
];
// function to check if the requested resource is an image
function isImg(url) {
var acceptedExts = ['jpg', 'jpeg', 'png'];
var baseUrl = url.split('?')[0];
var ext = baseUrl.split('.').pop().toLowerCase();
if (acceptedExts.indexOf(ext) > -1) {
return true;
} else {
return false;
}
}
// function to check if an url has a given extension
function isExt(url, ext) {
var baseUrl = url.split('?')[0];
var fileExt = baseUrl.split('.').pop().toLowerCase();
if (ext == fileExt) {
return true;
} else {
return false;
}
}
// check we have a url, if not exit
if (system.args.length === 1) {
console.log('Usage: get_ga_resources.js http://www.yoururl.com');
phantom.exit(1);
} else {
// address is the url passed
address = system.args[1];
// create a function that is called every time a resource is requested
// http://phantomjs.org/api/webpage/handler/on-resource-requested.html
page.onResourceRequested = function(request) {
if (system.args[1] == request.url) {
return;
} else if (isImg(request.url) || isExt(request.url, 'js') || isExt(request.url, 'css') || isExt(request.url, 'woff')) {
networkRequest.abort();
} else {
var length = resources_to_log.length;
while (length--) {
if (resources_to_log[length].test(request.url)) {
url = request.url;
phantom.exit();
}
}
}
};
if debug is true, log errors, else ignore them
page.onError = function(msg, trace) {
if (DEBUG) {
console.log('ERROR: ' + msg)
console.log(trace)
}
};
// make a note of any errors so we can print them out
page.onResourceError = function(resourceError) {
page.reason = resourceError.errorString;
page.reason_url = resourceError.url;
};
page.onLoadFinished = function(status) {
// console.log(JSON.stringify(urls));
var content = JSON.parse(fs.read(file));
content.urls.push(url);
fs.write(file, JSON.stringify(content), 'w');
phantom.exit();
};
// now all we have to do is open the page, wait WAIT_TIME ms and exit
try {
page.open(address, function(status) {
if (status !== 'success') {
console.log("FAILED: to load " + system.args[1]);
console.log(page.reason_url);
console.log(page.reason);
phantom.exit();
} else {
if (address != page.url) {
console.log('Redirected: ' + page.url)
}
setTimeout(function() {
phantom.exit();
}, WAIT_TIME);
}
});
} finally {
}
}
@prastut
Copy link
Author

prastut commented Jul 26, 2017

To run phantomjs ga-get-resource.js <url>

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment