Skip to content

Instantly share code, notes, and snippets.

@andy-blum
Created September 8, 2023 18:42
Show Gist options
  • Save andy-blum/d6d196ed69d9565f24e42198c4628e35 to your computer and use it in GitHub Desktop.
Save andy-blum/d6d196ed69d9565f24e42198c4628e35 to your computer and use it in GitHub Desktop.
Web Sustainability Guidelines Scraper
(() => {
const tocItems = [...document.querySelectorAll('#toc .tocxref')];
const tocRoot = document.querySelector('#toc > ol.toc');
const guidelineData = {}
const guidelines = tocItems
.filter(item => {
const label = item.textContent;
// Chapter number - e.g. "1.2.1 Principles" => 1
const number = parseInt(label.split('.')[0]);
const isGuideline =
!isNaN(number) && // Some TOC titles don't have numbers
number > 1 && // Chapter 1 is an intro
number < 6 && // Chapter 6 is a glossary
item.parentElement.parentElement !== tocRoot
return isGuideline;
});
guidelines.forEach(item => {
const number = item.querySelector('.secno').textContent.trim();
const title = item.textContent.substring(number.length).trim();
const sectionTitle = item.closest('.toc').previousElementSibling.textContent;
const itemData = {
link: '',
number,
title: title,
description: '',
impact: '',
effort: '',
benefits: [],
success_criterion: [],
};
const baseURL = 'https://w3c.github.io/sustyweb';
const href = item.getAttribute('href');
const content = document.querySelector(href);
const sections = content.querySelectorAll('section');
itemData.link = `${baseURL}/${href}`;
itemData.description = content.querySelector('p').textContent;
sections.forEach(section => {
const title = section.querySelector('h4').textContent;
switch (true) {
case title.startsWith('Success Criterion'):
itemData.success_criterion.push({
title: title.substring(20),
explanation: [...section.querySelectorAll('p')].map(p => p.textContent).join(' ')
});
break;
case title.startsWith('Impact & Effort'):
itemData.impact = section.querySelector(':nth-child(1 of dd)').textContent.substring(0,1);
itemData.effort = section.querySelector(':nth-child(2 of dd)').textContent.substring(0,1);
break;
case title.startsWith('Benefits'):
itemData.benefits = [...section.querySelectorAll('ul.benefits li')].map(benefit => {
const title = benefit.querySelector('strong').textContent.slice(0, -1);
const desc = benefit.textContent.substring(title.length + 1).trim();
return {
title,
description: desc
}
})
break;
default:
break;
}
});
if (!guidelineData[sectionTitle]) {
guidelineData[sectionTitle] = [];
}
guidelineData[sectionTitle].push(itemData)
});
const markup = [];
Object.entries(guidelineData).forEach(([title, data]) => {
const sectionMarkup = `
<h2>${title.split('.')[1].trim()}</h2>
<table>
<thead>
<tr>
<th># (I/E)</th>
<th>Guideline</th>
<th>Benefits</th>
<th>Success Criterion</th>
</tr>
</thead>
<tbody>
${data.map(rule => {
const ruleRow = `
<tr>
<td>
<a href="${rule.link}">${rule.number}</a>
<br/>(${rule.impact}/${rule.effort})
</td>
<td><details><summary>${rule.title}</summary>${rule.description}</details></td>
<td>
<ul>
${rule.benefits.map(b => `<li><details><summary>${b.title}:</summary>${b.description}</details></li>`).join('')}
</ul>
</td>
<td>
<ul>
${rule.success_criterion.map(c => `<li><details><summary>${c.title}:</summary>${c.explanation}</details></li>`).join('')}
</ul>
</td>
</tr>
`
return ruleRow.trim();
}).join('')}
</tbody>
</table>`;
markup.push(sectionMarkup.trim())
console.groupCollapsed(title)
console.table(data)
console.groupEnd()
});
console.log(markup.join(''));
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment