Created
January 29, 2019 07:53
-
-
Save martinschierle/451d60c07a7cb1ec6d2b390522057afa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** Small puppeteer script to calculate lines of code as used by a website, | |
* to use in infogrphics like this: | |
* https://informationisbeautiful.net/visualizations/million-lines-of-code/ | |
**/ | |
'use strict'; | |
const puppeteer = require('puppeteer'); | |
const fs = require('fs'); | |
var htmlparser = require("htmlparser2"); | |
function getSlocOfString(str) { | |
return str.split(/{|}|;/).length; | |
} | |
async function getSloc(url) { | |
const browser = await puppeteer.launch(); | |
const page = await browser.newPage(); | |
var counts = []; | |
page.on('response', response => { | |
if (response.request().resourceType() === 'script') { | |
//console.log("Found script: " + response.url()); | |
response.text().then(content => { | |
var count = content.split(/{|}|;/).length; | |
counts[response.url()] = getSlocOfString(content); | |
}).catch(e => console.log(e.message)); | |
} | |
}); | |
try { | |
await page.goto(url); | |
} catch(e) { | |
console.log("Can't process: " + url); | |
} | |
try { | |
var text = await page.$$eval('script', elements => {var content = "";for(var i = 0; i < elements.length; i++) content+=elements[i].textContent;return content;} ); | |
//console.log("inline: " + getSlocOfString(text)); | |
counts[url] = getSlocOfString(text); | |
} catch(e) { | |
console.log("Can't get inline sloc for : " + url); | |
} | |
// count sloc sum | |
var sum = 0; | |
for(var key in counts) { | |
sum += counts[key]; | |
} | |
// write out result of reasonable | |
if(sum > 100) { | |
var csv = url + "," + sum + "\n"; | |
console.log(csv); | |
fs.appendFileSync('output.csv', csv); | |
} | |
await browser.close(); | |
}; | |
(async() => { | |
try { | |
fs.unlinkSync("output.csv"); | |
}catch(e){} | |
// sites to analyze | |
var sites = []; | |
for(var i = 0; i < sites.length; i++) { | |
//if(i > 50) break; | |
await getSloc(sites[i]); | |
} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment