Skip to content

Instantly share code, notes, and snippets.

@martinschierle
Created August 20, 2020 07:48
Show Gist options
  • Save martinschierle/a549219714a4b72384b36f72f0aff805 to your computer and use it in GitHub Desktop.
Save martinschierle/a549219714a4b72384b36f72f0aff805 to your computer and use it in GitHub Desktop.
Crawler to crawl for layout shift sources
const puppeteer = require('puppeteer');
const { createCanvas, loadImage } = require('canvas')
const mustache = require('mustache')
var fs = require('fs');
const fsExtra = require('fs-extra')
let MAX_URLS = 50;
let TEMPLATE = fs.readFileSync('template.html', 'utf8');
// using googlebot user agent might get rid of some cookie alerts
//const agent = "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z‡ Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
const Good3G = {
'offline': false,
'downloadThroughput': 1.5 * 1024 * 1024 / 8,
'uploadThroughput': 750 * 1024 / 8,
'latency': 40
};
const phone = puppeteer.devices['Nexus 5X'];
function injectJs() {
window.shifts = [];
function didSizeChange(src) {
if (src.previousRect.width !== src.currentRect.width) return true;
if (src.previousRect.height !== src.currentRect.height) return true;
return false;
}
function getLastResources(startTime, endTime, regex) {
let results = [];
let entries = performance.getEntriesByType('resource')
for(let i = 0; i < entries.length; i++) {
let e = entries[i];
if(regex && !e.name.match(regex)) continue;
if(e.responseEnd < endTime && e.responseEnd > startTime) results.push(e);
}
return results;
}
let po = new PerformanceObserver((list) => {
for (const entry of list.getEntries()) {
//console.log(entry);
let val = entry.value;
//let lastRes = getResources(entry.startTime - 150, entry.startTime);
let lastFonts = getLastResources(entry.startTime-150, entry.startTime, /(woff)|(ttf)/);
let lastAds = getLastResources(entry.startTime-400, entry.startTime, /\/ads\?/);
let lastImgs = getLastResources(entry.startTime-150, entry.startTime, /\.(jpg)|(png)|(gif)|(svg)|(jpeg)|(webp)/);
// if we have a text node which changed size, and a font loaded, then attribute the layout jump to the font
// not completely true, could also be due to CSS
if(lastFonts.length > 0 ) {
console.log("Probing for fonts:");
console.log(lastFonts);
console.log(entry);
let last = lastFonts[lastFonts.length-1];
let diff = entry.startTime - last.responseEnd;
//is there a source which is just an icon or text node?
for(var i = 0; i < entry.sources.length; i++) {
let src = entry.sources[i];
if(!didSizeChange(src)) continue;
if(src.node.nodeType === Node.TEXT_NODE || src.node.tagName === "P" || src.node.tagName === "I" || src.node.tagName === "A" ) {
window.shifts.push({url: document.location.href, cause: "FONT", ressource: last.name, impact: val, timegap: diff});
console.log(last.name + " was loaded " + diff + "ms before a layout shift with impact " + val);
}
}
}
// if an image was loaded, and the size of an image node with same src changed, attribute shift to the image
if(lastImgs.length > 0 ) {
console.log("Probing for images:");
console.log(lastImgs);
console.log(entry);
//is there a source which is just an image node?
for(var i = 0; i < entry.sources.length; i++) {
let src = entry.sources[i];
if(!didSizeChange(src)) continue;
console.log("Probing for tagname:");
if(src.node.tagName === "IMG" || src.node.tagName === "PICTURE"|| src.node.tagName === "DIV") {
// find the relevant image
let last = null;
for(var j = 0; j < lastImgs.length; j++) {
let filename = new URL(lastImgs[j].name).pathname.split('/').pop();
console.log("Probing for filename: " + filename);
if(src.node.outerHTML.indexOf(filename) >= 0) {
last = lastImgs[j];
break;
}
}
if(!last) continue;
let diff = entry.startTime - last.responseEnd;
window.shifts.push({url: document.location.href, cause: "IMAGE", ressource: last.name, impact: val, timegap: diff});
console.log(last.name + " was loaded " + diff + "ms before a layout shift with impact " + val);
}
}
}
// if an ad was loaded, and the size of a div containing an ad changed, attribute shift to the image
if(lastAds.length > 0 ) {
console.log("Probing for ads:");
console.log(lastAds);
console.log(entry);
let last = lastAds[lastAds.length-1];
let diff = entry.startTime - last.responseEnd;
//is there a source which might contain an ad?
for(var i = 0; i < entry.sources.length; i++) {
let src = entry.sources[i];
if(!didSizeChange(src)) continue;
console.log("Probing tagname");
if(src.node.tagName === "DIV" || src.node.tagName === "SPAN" || src.node.tagName === "IFRAME"|| src.node.tagName === "SECTION") {
console.log("Probing content");
if(src.node.outerHTML.indexOf("google_ads_iframe") >= 0) {
window.shifts.push({url: document.location.href, cause: "ADS", ressource: last.name, impact: val, timegap: diff});
console.log(last.name + " was loaded " + diff + "ms before a layout shift with impact " + val);
}
}
}
}
}
});
po.observe({type: 'layout-shift', buffered: true});
}
async function doBatch(urls, max) {
// reset output file and images dir
fsExtra.emptyDirSync("output");
fs.mkdirSync("output/images");
//json results object to write towards mustache at the end
let results = [];
const browser = await puppeteer.launch({
args: ['--no-sandbox'],
//headless: false,
//executablePath: '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
timeout: 10000
});
const page = await browser.newPage();
//await page.evaluateOnNewDocument(injectJs);
//phone.userAgent = agent;
await page.emulate(phone);
const client = await page.target().createCDPSession();
await client.send('Network.enable');
await client.send('ServiceWorker.enable');
await client.send('Network.emulateNetworkConditions', Good3G);
//await client.send('Emulation.setCPUThrottlingRate', { rate: 4 });
for(var k = 0; k < Math.min(max, urls.length); k++) {
const url = urls[k];
console.log("Processing: " + url);
try {
// inject a function with the code from https://web.dev/cls/#measure-cls-in-javascript
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000});
//page.on('console', consoleObj => console.log(consoleObj.text()));
await page.waitFor(2000); // let's give it a bit more time, to be sure everything's loaded
console.log("Injecting JS...");
await Promise.race([
page.evaluate(injectJs),
page.waitFor(5000)
]);
page.waitFor(2000);
console.log("Gathering data...");
let url_results = await Promise.race([
page.evaluate(function() {return window.shifts}),
page.waitFor(5000)
]);
if(!url_results) {
console.log("Couldn't retrieve results.");
continue;
}
results = results.concat(url_results);
} catch (error) {
console.log(error);
//process.exit(0);
}
}
// write out result html
results.sort((a, b) => (a.impact > b.impact) ? -1 : 1)
var rendered = mustache.render(TEMPLATE, {items: results});
fs.writeFileSync('output/index.html', rendered)
}
let urls = fs.readFileSync('input.csv').toString().split("\n");
doBatch(urls, 200).then(res => {console.log("Done!");process.exit(0);});
<html>
<head>
<style>
#mainTable tr:nth-child(odd){
background-color: lightblue;
}
thead {
font-weight: bold;
background-color: lightgrey;
}
.screenshot {
margin:20px;
width:150px;
}
.heatmap {
width:400px;
}
td {
word-break:break-all;
width: 16%;
}
</style>
</head>
<body onload="renderHello()">
<h1>Analysis results for Font-related layout shifts</h1>
<table>
<thead>
<tr>
<td width="10%">URL</td>
<td>Cause</td>
<td>Ressource</td>
<td>CLS Impact</td>
<td>Timegap</td>
</tr>
</thead>
<tbody id="mainTable">
{{#items}}
<tr>
<td>{{url}}</td>
<td>{{cause}}</td>
<td>{{ressource}}</td>
<td>{{impact}}</td>
<td>{{timegap}}</td>
</tr>
{{/items}}
</tbody>
</table>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment