Last active
July 5, 2022 22:24
-
-
Save j127/5cde1dae21931017d9251c82b24df3fb to your computer and use it in GitHub Desktop.
Wikipedia cat scraper with images https://www.youtube.com/watch?v=rlv7ueX4Yjc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// First, extract just the one table. | |
var table = document.querySelector("table"); | |
// Then get all the rows from that table. | |
var tableRows = table.querySelectorAll("tr"); | |
// I used `reduce` here, because the number of returned | |
// items might be different than the original array. | |
// It's probably possible to improve this. It was coded quickly. | |
var catArray = [...tableRows].slice(1).reduce((acc, row) => { | |
const el = row.querySelector("th[scope='row']"); | |
if (el) { | |
const breed = el | |
.innerText | |
.replace(/\[.+\]/, "") | |
.replace(/\n/g, " "); | |
const img = row.querySelector("td:last-child img"); | |
acc.push({ | |
breed, | |
img: img?.src || null | |
}); | |
} | |
return acc; | |
}, []); | |
var output = JSON.stringify(catArray, null, 4); | |
document.body.innerHTML = `<pre>${output}</pre>`; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment