Created
July 26, 2017 18:30
-
-
Save prastut/2fde3d4ab3cf92b0e7c9fed9820589d1 to your computer and use it in GitHub Desktop.
Filter GA requests
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var page = require('webpage').create(), | |
system = require('system'), | |
fs = require('fs'), | |
file = 'data.json', | |
address; | |
// how long should we wait for the page to load before we exit | |
// in ms | |
var WAIT_TIME = 1000; | |
// if the page hasn't loaded after this long, something is probably wrong. | |
// in ms | |
var MAX_EXECUTION_TIME = 15000; | |
// output error messages | |
var DEBUG = false | |
var url; | |
// a list of regular expressions of resources (urls) to log when we load them | |
var resources_to_log = [ | |
new RegExp('^http(s)?://(www|ssl)\.google-analytics\.com.*'), | |
new RegExp('^http(s)?://stats\.g\.doubleclick\.net.*') | |
]; | |
// function to check if the requested resource is an image | |
function isImg(url) { | |
var acceptedExts = ['jpg', 'jpeg', 'png']; | |
var baseUrl = url.split('?')[0]; | |
var ext = baseUrl.split('.').pop().toLowerCase(); | |
if (acceptedExts.indexOf(ext) > -1) { | |
return true; | |
} else { | |
return false; | |
} | |
} | |
// function to check if an url has a given extension | |
function isExt(url, ext) { | |
var baseUrl = url.split('?')[0]; | |
var fileExt = baseUrl.split('.').pop().toLowerCase(); | |
if (ext == fileExt) { | |
return true; | |
} else { | |
return false; | |
} | |
} | |
// check we have a url, if not exit | |
if (system.args.length === 1) { | |
console.log('Usage: get_ga_resources.js http://www.yoururl.com'); | |
phantom.exit(1); | |
} else { | |
// address is the url passed | |
address = system.args[1]; | |
// create a function that is called every time a resource is requested | |
// http://phantomjs.org/api/webpage/handler/on-resource-requested.html | |
page.onResourceRequested = function(request) { | |
if (system.args[1] == request.url) { | |
return; | |
} else if (isImg(request.url) || isExt(request.url, 'js') || isExt(request.url, 'css') || isExt(request.url, 'woff')) { | |
networkRequest.abort(); | |
} else { | |
var length = resources_to_log.length; | |
while (length--) { | |
if (resources_to_log[length].test(request.url)) { | |
url = request.url; | |
phantom.exit(); | |
} | |
} | |
} | |
}; | |
if debug is true, log errors, else ignore them | |
page.onError = function(msg, trace) { | |
if (DEBUG) { | |
console.log('ERROR: ' + msg) | |
console.log(trace) | |
} | |
}; | |
// make a note of any errors so we can print them out | |
page.onResourceError = function(resourceError) { | |
page.reason = resourceError.errorString; | |
page.reason_url = resourceError.url; | |
}; | |
page.onLoadFinished = function(status) { | |
// console.log(JSON.stringify(urls)); | |
var content = JSON.parse(fs.read(file)); | |
content.urls.push(url); | |
fs.write(file, JSON.stringify(content), 'w'); | |
phantom.exit(); | |
}; | |
// now all we have to do is open the page, wait WAIT_TIME ms and exit | |
try { | |
page.open(address, function(status) { | |
if (status !== 'success') { | |
console.log("FAILED: to load " + system.args[1]); | |
console.log(page.reason_url); | |
console.log(page.reason); | |
phantom.exit(); | |
} else { | |
if (address != page.url) { | |
console.log('Redirected: ' + page.url) | |
} | |
setTimeout(function() { | |
phantom.exit(); | |
}, WAIT_TIME); | |
} | |
}); | |
} finally { | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
To run
phantomjs ga-get-resource.js <url>