Skip to content

Instantly share code, notes, and snippets.

@martinschierle
Last active August 3, 2020 12:39
Show Gist options
  • Star 9 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save martinschierle/17384e78c3449b01174443d294a43371 to your computer and use it in GitHub Desktop.
Save martinschierle/17384e78c3449b01174443d294a43371 to your computer and use it in GitHub Desktop.
Small puppeteer script to run over a domain, crawl random urls, capture screenshots with lcp and cls elems highlighted, and also writing out a heatmap of lcp and cls bounding boxes across all crawled pages.
const puppeteer = require('puppeteer');
const { createCanvas, loadImage } = require('canvas')
const mustache = require('mustache')
var fs = require('fs');
const fsExtra = require('fs-extra')
let MAX_URLS = 50;
let TEMPLATE = fs.readFileSync('template.html', 'utf8');
// using googlebot user agent might get rid of some cookie alerts
//const agent = "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z‡ Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
const Good3G = {
'offline': false,
'downloadThroughput': 1.5 * 1024 * 1024 / 8,
'uploadThroughput': 750 * 1024 / 8,
'latency': 40
};
const phone = puppeteer.devices['Nexus 5X'];
function shuffle(a) {
var j, x, i;
for (i = a.length - 1; i > 0; i--) {
j = Math.floor(Math.random() * (i + 1));
x = a[i];
a[i] = a[j];
a[j] = x;
}
return a;
}
function injectJs() {
// inject some css to highlight the offending elements
var style = document.createElement('style');
style.type = 'text/css';
style.innerHTML = '.cls_elem {border: 5px solid green; box-sizing: border-box;} .lcp_elem{border: 5px solid red; box-sizing: border-box;}';
document.getElementsByTagName('head')[0].appendChild(style);
window.lcp = {value: -1, size: -1};
window.cls = {value: 0};
window.lcp_elem = null;
window.cls_elems = [];
var cssPath = function (el) {
var path = [];
while (
(el.nodeName.toLowerCase() != 'html') &&
(el = el.parentNode) &&
path.unshift(el.nodeName.toLowerCase() +
(el.id ? '#' + el.id : '') +
(el.className ? '.' + el.className.replace(/\s+/g, ".") : ''))
);
return path.join(" > ");
}
// Create a PerformanceObserver that calls `updateLCP` for each entry.
const po = new PerformanceObserver((entryList) => {
entryList.getEntries().forEach(function(entry) {
console.log(entry);
if(entry.size > window.lcp.size) {
let e = entry.element;
if(!(e instanceof HTMLElement)) e = e.parentElement;
e.classList.add("lcp_elem");
if(window.lcp_elem) window.lcp_elem.classList.remove("lcp_elem");
window.lcp.size = entry.size;
window.lcp.value = entry.startTime;
window.lcp_elem = e;
window.lcp.tagName = e.tagName;
window.lcp.classes = e.getAttribute("class");
window.lcp.path = cssPath(e);
}
});
});
// Observe entries of type `largest-contentful-paint`, including buffered entries,
// i.e. entries that occurred before calling `observe()` below.
po.observe({
type: 'largest-contentful-paint',
buffered: true,
});
try {
const cls_po = new PerformanceObserver((list) => {
for (const entry of list.getEntries()) {
console.log(entry);
window.cls.value += entry.value;
if(entry.sources && entry.sources.length>0) {
// find the source of maximum size
for(var i = 0;i < entry.sources.length; i++) {
let source = entry.sources[i];
let e = source.node;
if(!e) continue;
if(!(e instanceof HTMLElement)) e = e.parentElement;
e.classList.add("cls_elem");
window.cls_elems.push(e);
}
}
}
});
cls_po.observe({type: 'layout-shift', buffered: true});
} catch (e) {
console.log(e.message);
// Do nothing if the browser doesn't support this API.
}
}
async function doBatch(domain, max_urls) {
// reset output file and images dir
fsExtra.emptyDirSync("output");
fs.mkdirSync("output/images");
// we start with an empty url array, and add newly crawled ones to that
let urls = [];
urls.push(domain);
// array to remember the already processed urls, to not cover them twice
let processedURLs = {};
//json results object to write towards mustache at the end
let results = {domain: domain, items: []};
//initialize two canvases for drawing output heatmaps
const cls_canvas = createCanvas(phone.viewport.width, phone.viewport.height);
const cls_ctx = cls_canvas.getContext('2d');
const lcp_canvas = createCanvas(phone.viewport.width, phone.viewport.height);
const lcp_ctx = lcp_canvas.getContext('2d');
const browser = await puppeteer.launch({
args: ['--no-sandbox'],
//headless: false,
//executablePath: '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
timeout: 10000
});
const page = await browser.newPage();
//await page.evaluateOnNewDocument(injectJs);
//phone.userAgent = agent;
await page.emulate(phone);
const client = await page.target().createCDPSession();
await client.send('Network.enable');
await client.send('ServiceWorker.enable');
//await client.send('Network.emulateNetworkConditions', Good3G);
//await client.send('Emulation.setCPUThrottlingRate', { rate: 4 });
while(Object.keys(processedURLs).length < max_urls && urls.length > 0) {
urls = shuffle(urls);
const url = urls.pop();
processedURLs[url] = true;
console.log("Processing: " + url);
try {
// inject a function with the code from https://web.dev/cls/#measure-cls-in-javascript
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000});
//page.on('console', consoleObj => console.log(consoleObj.text()));
await page.waitFor(2000); // let's give it a bit more time, to be sure everything's loaded
//get links and append
let links = await page.$$("a[href*='" + new URL(domain).hostname + "']");
for(var j = 0; j < links.length; j++ ) {
let propertyHandle = await links[j].getProperty("href");
const href = await propertyHandle.jsonValue();
if(processedURLs[href])continue;
urls.push(href);
}
console.log("Injecting JS...");
await Promise.race([
page.evaluate(injectJs),
page.waitFor(5000)
]);
page.waitFor(2000);
console.log("Gathering data...");
let url_results = await Promise.race([
page.evaluate(function() {return {'cls': window.cls, 'lcp': window.lcp}}),
page.waitFor(5000)
]);
if(!url_results) {
console.log("Couldn't retrieve results.");
continue;
}
let cls = url_results.cls;
let lcp = url_results.lcp;
let lcp_elem = await page.$(".lcp_elem");
let cls_elems = await page.$$(".cls_elem");
console.log("Getting screenshot...");
let screenshot_path = "output/images/" + url.replace(/[^a-zA-Z0-9]/gi, "") + ".jpeg";
try {
await page.screenshot({path: screenshot_path, type: "jpeg", "quality": 30});
} catch(e) {console.log("Can't take screenshot: " + e.message)}
//adding to lcp heatmap
if(lcp_elem && lcp_elem.boundingBox) {
let box = await lcp_elem.boundingBox();
let alpha = 3.0 / max_urls;
lcp_ctx.fillStyle = 'rgba(255, 0, 0, ' + alpha + ')';
lcp_ctx.fillRect(box.x, box.y, box.width, box.height);
}
//adding to cls heatmap
if(cls_elems && cls_elems.length > 0) {
for(k = 0; k < cls_elems.length; k++) {
let cls_elem = cls_elems[k];
if(!cls_elem.boundingBox) continue;
let box = await cls_elem.boundingBox();
let alpha = 1.0 / max_urls;
cls_ctx.fillStyle = 'rgba(0, 255, 0, ' + alpha + ')';
cls_ctx.fillRect(box.x, box.y, box.width, box.height);
}
}
results.items.push({url: url, lcp: lcp.value, cls: cls.value, lcpTagName: lcp.tagName, lcpClasses: lcp.classes, lcpPath: lcp.path, screenshot: screenshot_path});
//out = new URL(url).hostname + "," + url + "," + lcp.value + ", " + cls.value + "," + lcp.tagName + "," + lcp.classes + "," + lcp.path + "," + screenshot_path;
//console.log(out);
} catch (error) {
console.log(error);
//process.exit(0);
}
}
// write out lcp heatmap
var buf = lcp_canvas.toBuffer();
fs.writeFileSync("output/lcp_heatmap.png", buf);
results.lcpHeatmap = "lcp_heatmap.png";
// write out cls heatmap
var buf = cls_canvas.toBuffer();
fs.writeFileSync("output/cls_heatmap.png", buf);
results.clsHeatmap = "cls_heatmap.png";
// write out result html
var rendered = mustache.render(TEMPLATE, results);
fs.writeFileSync('output/index.html', rendered)
}
var myArgs = process.argv.slice(2);
if(!myArgs || myArgs.length === 0) {
console.log("Please give a domain to crawl as argument!");
}
else {
let domain = myArgs[0];
if(!domain.startsWith("http")) domain = "https://" + domain;
console.log("Crawling domain: " + domain);
let max = MAX_URLS;
if(myArgs.length == 2) max = parseInt(myArgs[1]);
doBatch(domain, max).then(res => {console.log("Done!");process.exit(0);});
}
<html>
<head>
<style>
#mainTable tr:nth-child(odd){
background-color: lightblue;
}
thead {
font-weight: bold;
background-color: lightgrey;
}
.screenshot {
margin:20px;
width:150px;
}
.heatmap {
width:400px;
}
td {
word-break:break-all;
width: 16%;
}
</style>
</head>
<body onload="renderHello()">
<h1>Analysis results for domain {{domain}}</h1>
<h2>Heatmaps</h2>
<table>
<thead>
<tr>
<td>LCP Heatmap</td>
<td>CLS Heatmap</td>
</tr>
</thead>
<tbody>
<tr>
<td><img src="{{ lcpHeatmap }}" class="heatmap"></td>
<td><img src="{{ clsHeatmap }}" class="heatmap"></td>
</tr>
</tbody>
</table>
<h2>Detailed Results</h2>
<table>
<thead>
<tr>
<td width="10%">URL</td>
<td>LCP</td>
<td>CLS</td>
<td>LCP Tag Name</td>
<td>LCP CLasses</td>
<td>LCP DOM Path</td>
<td>Screenshot</td>
</tr>
</thead>
<tbody id="mainTable">
{{#items}}
<tr>
<td>{{url}}</td>
<td>{{lcp}}</td>
<td>{{cls}}</td>
<td>{{lcpTagName}}</td>
<td>{{lcpClasses}}</td>
<td>{{lcpPath}}</td>
<td><img src="../{{screenshot}}" class="screenshot"></td>
</tr>
{{/items}}
</tbody>
</table>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment