-
-
Save andy-blum/d6d196ed69d9565f24e42198c4628e35 to your computer and use it in GitHub Desktop.
Web Sustainability Guidelines Scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(() => { | |
const tocItems = [...document.querySelectorAll('#toc .tocxref')]; | |
const tocRoot = document.querySelector('#toc > ol.toc'); | |
const guidelineData = {} | |
const guidelines = tocItems | |
.filter(item => { | |
const label = item.textContent; | |
// Chapter number - e.g. "1.2.1 Principles" => 1 | |
const number = parseInt(label.split('.')[0]); | |
const isGuideline = | |
!isNaN(number) && // Some TOC titles don't have numbers | |
number > 1 && // Chapter 1 is an intro | |
number < 6 && // Chapter 6 is a glossary | |
item.parentElement.parentElement !== tocRoot | |
return isGuideline; | |
}); | |
guidelines.forEach(item => { | |
const number = item.querySelector('.secno').textContent.trim(); | |
const title = item.textContent.substring(number.length).trim(); | |
const sectionTitle = item.closest('.toc').previousElementSibling.textContent; | |
const itemData = { | |
link: '', | |
number, | |
title: title, | |
description: '', | |
impact: '', | |
effort: '', | |
benefits: [], | |
success_criterion: [], | |
}; | |
const baseURL = 'https://w3c.github.io/sustyweb'; | |
const href = item.getAttribute('href'); | |
const content = document.querySelector(href); | |
const sections = content.querySelectorAll('section'); | |
itemData.link = `${baseURL}/${href}`; | |
itemData.description = content.querySelector('p').textContent; | |
sections.forEach(section => { | |
const title = section.querySelector('h4').textContent; | |
switch (true) { | |
case title.startsWith('Success Criterion'): | |
itemData.success_criterion.push({ | |
title: title.substring(20), | |
explanation: [...section.querySelectorAll('p')].map(p => p.textContent).join(' ') | |
}); | |
break; | |
case title.startsWith('Impact & Effort'): | |
itemData.impact = section.querySelector(':nth-child(1 of dd)').textContent.substring(0,1); | |
itemData.effort = section.querySelector(':nth-child(2 of dd)').textContent.substring(0,1); | |
break; | |
case title.startsWith('Benefits'): | |
itemData.benefits = [...section.querySelectorAll('ul.benefits li')].map(benefit => { | |
const title = benefit.querySelector('strong').textContent.slice(0, -1); | |
const desc = benefit.textContent.substring(title.length + 1).trim(); | |
return { | |
title, | |
description: desc | |
} | |
}) | |
break; | |
default: | |
break; | |
} | |
}); | |
if (!guidelineData[sectionTitle]) { | |
guidelineData[sectionTitle] = []; | |
} | |
guidelineData[sectionTitle].push(itemData) | |
}); | |
const markup = []; | |
Object.entries(guidelineData).forEach(([title, data]) => { | |
const sectionMarkup = ` | |
<h2>${title.split('.')[1].trim()}</h2> | |
<table> | |
<thead> | |
<tr> | |
<th># (I/E)</th> | |
<th>Guideline</th> | |
<th>Benefits</th> | |
<th>Success Criterion</th> | |
</tr> | |
</thead> | |
<tbody> | |
${data.map(rule => { | |
const ruleRow = ` | |
<tr> | |
<td> | |
<a href="${rule.link}">${rule.number}</a> | |
<br/>(${rule.impact}/${rule.effort}) | |
</td> | |
<td><details><summary>${rule.title}</summary>${rule.description}</details></td> | |
<td> | |
<ul> | |
${rule.benefits.map(b => `<li><details><summary>${b.title}:</summary>${b.description}</details></li>`).join('')} | |
</ul> | |
</td> | |
<td> | |
<ul> | |
${rule.success_criterion.map(c => `<li><details><summary>${c.title}:</summary>${c.explanation}</details></li>`).join('')} | |
</ul> | |
</td> | |
</tr> | |
` | |
return ruleRow.trim(); | |
}).join('')} | |
</tbody> | |
</table>`; | |
markup.push(sectionMarkup.trim()) | |
console.groupCollapsed(title) | |
console.table(data) | |
console.groupEnd() | |
}); | |
console.log(markup.join('')); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment