Last active
August 29, 2015 13:57
-
-
Save saranrapjs/9517676 to your computer and use it in GitHub Desktop.
PhantomJS script for deducing included resources using headless webkit, and determining which of them have been successfully cached by Varnish!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var system = require('system'), | |
fs = require("fs"), | |
CACHED_HEADER_NAME_REGEX = /X-Cache/, | |
CACHED_HEADER_VALUE_REGEX = /HIT/, | |
address, | |
parsedAddress, | |
cachedBytes = 0, | |
nocacheBytes= 0, | |
showAllURLS = false, | |
quiet = false, | |
matchAddress = ''; | |
if (system.args.length === 1) { | |
writeOut('Usage: netlog.js <some URL>'); | |
phantom.exit(1); | |
} else { | |
address = system.args[1]; | |
parsedAddress = parseUri(address); | |
matchAddress = '//' + parsedAddress.host; | |
system.args.forEach(function(arg, i) { | |
if (arg === "-q") quiet = true; | |
if (arg === "-a") showAllURLS = true; | |
}); | |
if (system.args[2] && system.args[2] === "prerun") { | |
// crazy workaround for this issue: https://github.com/ariya/phantomjs/issues/10357 | |
var firstRun = require('webpage').create(); | |
firstRun.open(address, function (status) { | |
if (status !== 'success') { | |
writeOut('First run FAILED'); | |
} | |
phantom.exit(); | |
}); | |
} else { | |
writeOut('Loading page, 1st attempt, to trigger cache if necessary...') | |
var spawn = require('child_process').spawn, | |
child = spawn("phantomjs", [ "--ignore-ssl-errors=true", "cache.js", address, 'prerun' ]); | |
// probably shouldn't presume "ignore-ssl-errors", but seemed cheapest way for now | |
child.stdout.on("data", function (data) { | |
writeOut("STDOUT: ", JSON.stringify(data)) | |
}) | |
child.stderr.on("data", function (data) { | |
writeOut("STDERR: ", JSON.stringify(data)) | |
}) | |
child.on("exit", function (code) { | |
writeOut("First attempt complete. Loading page second attempt...") | |
run(address) | |
}) | |
} | |
} | |
function run(url) { | |
var page = require('webpage').create(); | |
page.onResourceRequested = function (req) { | |
// NA | |
}; | |
page.onResourceReceived = function (res) { | |
if (res.stage == 'end') return; | |
shouldTrackThisURL(res, function(res) { | |
var didCache = wasCached(res.headers), | |
didCacheString = (didCache === true) ? 'Y' : 'N', | |
color = (didCache === true) ? 'green' : 'red', | |
report = didCacheString+'\t'+(res.bodySize/1024).toFixed(2)+'KB\t'+res.url; | |
writeOut( stylize(report, color), "/dev/stdout") | |
if (didCache === true) { | |
cachedBytes += res.bodySize; | |
} else { | |
nocacheBytes += res.bodySize; | |
} | |
}) | |
}; | |
page.open(url, function (status) { | |
if (status !== 'success') { | |
writeOut('FAILED to load the address'); | |
} | |
var cachedKB = (cachedBytes/1024), | |
nocachedKB = (nocacheBytes/1024), | |
totalKB = (cachedKB+nocachedKB), | |
cachedPercent = Math.round((cachedKB / totalKB)*100); | |
writeOut("result: " + cachedPercent + "% cached (" + cachedKB.toFixed(2) + "KB/" + totalKB.toFixed(2) + "KB)"); | |
phantom.exit(); | |
}); | |
} | |
function shouldTrackThisURL(obj, callback) { | |
if (urlFilter(obj.url) === true) { | |
callback(obj) | |
} | |
} | |
function urlFilter(url) { | |
if (url.indexOf("data:") !== -1) return false; // we don't like data urls | |
if (showAllURLS === false && url.indexOf(matchAddress) === -1) return false; // we like our own urls | |
return true; | |
} | |
function wasCached(headers) { | |
for (var i in headers) { | |
if (headers[i].name.match(CACHED_HEADER_NAME_REGEX) && headers[i].value.match(CACHED_HEADER_VALUE_REGEX)) { | |
return true; | |
} | |
} | |
return false; | |
} | |
// from: http://planzero.org/blog/2013/03/07/spidering_the_web_with_casperjs | |
function parseUri(url) { | |
var match = String(url).replace(/^\s+|\s+$/g, '').match(/^([^:\/?#]+:)?(\/\/(?:[^:@]*(?::[^:@]*)?@)?(([^:\/?#]*)(?::(\d*))?))?([^?#]*)(\?[^#]*)?(#[\s\S]*)?/); | |
return (match ? { href: match[0] || '', protocol: match[1] || '', authority: match[2] || '', host: match[3] || '', hostname: match[4] || '', | |
port: match[5] || '', pathname: match[6] || '', search: match[7] || '', hash: match[8] || '' } : null); | |
} | |
function writeOut(str, destination) { | |
if (!destination) destination = "/dev/stderr"; | |
if (quiet === true && destination === "/dev/stderr") return false; // quiet option | |
fs.write(destination, str + '\n', "w"); | |
} | |
function stylize(str, style) { | |
// from http://news.qooxdoo.org/running-unit-tests-from-the-command-line | |
var styles = { | |
//styles | |
'bold' : [1, 22], | |
'italic' : [3, 23], | |
'underline' : [4, 24], | |
'inverse' : [7, 27], | |
//grayscale | |
'white' : [37, 39], | |
'grey' : [90, 39], | |
'black' : [90, 39], | |
//colors | |
'blue' : [34, 39], | |
'cyan' : [36, 39], | |
'green' : [32, 39], | |
'magenta' : [35, 39], | |
'red' : [31, 39], | |
'yellow' : [33, 39] | |
}; | |
return '\033[' + styles[style][0] + 'm' + str + | |
'\033[' + styles[style][1] + 'm'; | |
}; |
This one liner spits out just the URL's that were loaded:
phantomjs --ignore-ssl-errors=true cache.js URL_TO_BE_TESTED -q | awk '{print $3}'
This one liner will show all URL's (including URL's whose hostname does not match the main URL):
phantomjs --ignore-ssl-errors=true cache.js URL_TO_BE_TESTED -a
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
For local SSL testing, run with SSL errors disabled: