Last active
February 6, 2024 15:19
-
-
Save dnnsmnstrr/cd7b56d024f1a387676076612989d26d to your computer and use it in GitHub Desktop.
Wiki Table Scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function() { | |
const table = document.querySelector('.wikitable'); // Adjust the selector if needed | |
if (!table) { | |
console.log('Table not found.'); | |
return; | |
} | |
const rows = table.querySelectorAll('tr'); | |
const data = Array.from(rows).slice(1).map(row => { | |
const cells = row.querySelectorAll('td'); | |
if (cells.length < 4) { | |
return null; // Skip malformed rows | |
} | |
let title = cells[0].innerText.trim(); | |
title = title.replace(/\[.*?\]/g, ''); // Remove text in square brackets | |
const artist = cells[1].innerText.trim(); | |
const year = parseInt(cells[2].innerText.trim(), 10); | |
let progression = cells[3].innerText.trim(); | |
progression = progression.replace(/\(.*?\)/g, '').trim(); // Remove text in brackets | |
const recordedKey = cells[4].innerText.trim(); | |
const wikiUrl = cells[0].querySelector('a') ? `https://en.wikipedia.org${cells[0].querySelector('a').getAttribute('href')}` : ''; | |
let ultimateGuitarUrl = ''; | |
const refLink = cells[0].querySelector('a[rel="nofollow"]'); | |
if (refLink && refLink.href.includes('ultimate-guitar.com')) { | |
ultimateGuitarUrl = refLink.href; | |
} | |
return { | |
title, | |
artist, | |
year, | |
progression, | |
recordedKey, | |
wikiUrl, | |
ultimateGuitarUrl | |
}; | |
}).filter(Boolean); // Remove nulls for any skipped rows | |
console.log(JSON.stringify(data, null, 2)); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This can be run directly in the browser console and will generate a JSON string. I used it to quickly get the data from this page for a project I was working on. The script was mainly written by ChatGPT.