Skip to content

Instantly share code, notes, and snippets.

@saranrapjs
Last active August 29, 2015 13:57
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save saranrapjs/9517676 to your computer and use it in GitHub Desktop.
Save saranrapjs/9517676 to your computer and use it in GitHub Desktop.
PhantomJS script for deducing included resources using headless webkit, and determining which of them have been successfully cached by Varnish!
var system = require('system'),
fs = require("fs"),
CACHED_HEADER_NAME_REGEX = /X-Cache/,
CACHED_HEADER_VALUE_REGEX = /HIT/,
address,
parsedAddress,
cachedBytes = 0,
nocacheBytes= 0,
showAllURLS = false,
quiet = false,
matchAddress = '';
if (system.args.length === 1) {
writeOut('Usage: netlog.js <some URL>');
phantom.exit(1);
} else {
address = system.args[1];
parsedAddress = parseUri(address);
matchAddress = '//' + parsedAddress.host;
system.args.forEach(function(arg, i) {
if (arg === "-q") quiet = true;
if (arg === "-a") showAllURLS = true;
});
if (system.args[2] && system.args[2] === "prerun") {
// crazy workaround for this issue: https://github.com/ariya/phantomjs/issues/10357
var firstRun = require('webpage').create();
firstRun.open(address, function (status) {
if (status !== 'success') {
writeOut('First run FAILED');
}
phantom.exit();
});
} else {
writeOut('Loading page, 1st attempt, to trigger cache if necessary...')
var spawn = require('child_process').spawn,
child = spawn("phantomjs", [ "--ignore-ssl-errors=true", "cache.js", address, 'prerun' ]);
// probably shouldn't presume "ignore-ssl-errors", but seemed cheapest way for now
child.stdout.on("data", function (data) {
writeOut("STDOUT: ", JSON.stringify(data))
})
child.stderr.on("data", function (data) {
writeOut("STDERR: ", JSON.stringify(data))
})
child.on("exit", function (code) {
writeOut("First attempt complete. Loading page second attempt...")
run(address)
})
}
}
function run(url) {
var page = require('webpage').create();
page.onResourceRequested = function (req) {
// NA
};
page.onResourceReceived = function (res) {
if (res.stage == 'end') return;
shouldTrackThisURL(res, function(res) {
var didCache = wasCached(res.headers),
didCacheString = (didCache === true) ? 'Y' : 'N',
color = (didCache === true) ? 'green' : 'red',
report = didCacheString+'\t'+(res.bodySize/1024).toFixed(2)+'KB\t'+res.url;
writeOut( stylize(report, color), "/dev/stdout")
if (didCache === true) {
cachedBytes += res.bodySize;
} else {
nocacheBytes += res.bodySize;
}
})
};
page.open(url, function (status) {
if (status !== 'success') {
writeOut('FAILED to load the address');
}
var cachedKB = (cachedBytes/1024),
nocachedKB = (nocacheBytes/1024),
totalKB = (cachedKB+nocachedKB),
cachedPercent = Math.round((cachedKB / totalKB)*100);
writeOut("result: " + cachedPercent + "% cached (" + cachedKB.toFixed(2) + "KB/" + totalKB.toFixed(2) + "KB)");
phantom.exit();
});
}
function shouldTrackThisURL(obj, callback) {
if (urlFilter(obj.url) === true) {
callback(obj)
}
}
function urlFilter(url) {
if (url.indexOf("data:") !== -1) return false; // we don't like data urls
if (showAllURLS === false && url.indexOf(matchAddress) === -1) return false; // we like our own urls
return true;
}
function wasCached(headers) {
for (var i in headers) {
if (headers[i].name.match(CACHED_HEADER_NAME_REGEX) && headers[i].value.match(CACHED_HEADER_VALUE_REGEX)) {
return true;
}
}
return false;
}
// from: http://planzero.org/blog/2013/03/07/spidering_the_web_with_casperjs
function parseUri(url) {
var match = String(url).replace(/^\s+|\s+$/g, '').match(/^([^:\/?#]+:)?(\/\/(?:[^:@]*(?::[^:@]*)?@)?(([^:\/?#]*)(?::(\d*))?))?([^?#]*)(\?[^#]*)?(#[\s\S]*)?/);
return (match ? { href: match[0] || '', protocol: match[1] || '', authority: match[2] || '', host: match[3] || '', hostname: match[4] || '',
port: match[5] || '', pathname: match[6] || '', search: match[7] || '', hash: match[8] || '' } : null);
}
function writeOut(str, destination) {
if (!destination) destination = "/dev/stderr";
if (quiet === true && destination === "/dev/stderr") return false; // quiet option
fs.write(destination, str + '\n', "w");
}
function stylize(str, style) {
// from http://news.qooxdoo.org/running-unit-tests-from-the-command-line
var styles = {
//styles
'bold' : [1, 22],
'italic' : [3, 23],
'underline' : [4, 24],
'inverse' : [7, 27],
//grayscale
'white' : [37, 39],
'grey' : [90, 39],
'black' : [90, 39],
//colors
'blue' : [34, 39],
'cyan' : [36, 39],
'green' : [32, 39],
'magenta' : [35, 39],
'red' : [31, 39],
'yellow' : [33, 39]
};
return '\033[' + styles[style][0] + 'm' + str +
'\033[' + styles[style][1] + 'm';
};
@saranrapjs
Copy link
Author

For local SSL testing, run with SSL errors disabled:

phantomjs --ignore-ssl-errors=true cache.js URL_TO_BE_TESTED

@saranrapjs
Copy link
Author

This one liner spits out just the URL's that were loaded:

phantomjs --ignore-ssl-errors=true cache.js URL_TO_BE_TESTED -q | awk '{print $3}'

@saranrapjs
Copy link
Author

This one liner will show all URL's (including URL's whose hostname does not match the main URL):

phantomjs --ignore-ssl-errors=true cache.js URL_TO_BE_TESTED -a

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment