Skip to content

Instantly share code, notes, and snippets.

@jeremiahlangner
Last active September 27, 2017 14:43
Show Gist options
  • Save jeremiahlangner/005759a73f55167ace270ff7186338b6 to your computer and use it in GitHub Desktop.
Save jeremiahlangner/005759a73f55167ace270ff7186338b6 to your computer and use it in GitHub Desktop.
(function() {
/* WIP for scraping table data from rendered Wikipedia and copying JSON to clipboard;*/
var headlineEls = document.getElementsByClassName('mw-headline');
var headlines = {};
[].forEach.call(headlineEls, function(headline, i) {
var headline = headline.innerHTML;
headlines[headline] = i;
});
var tables = document.getElementsByClassName('wikitable');
[].forEach.call(tables, function(table, n) {
var tableObject = [];
var tableKeys = [];
var rows = table.getElementsByTagName('tr');
[].forEach.call(rows, function(row, i) {
var rowObject = {};
if(i == 0) {
var columns = row.getElementsByTagName('th');
[].forEach.call(columns, function(column, j) {
var columnData = column.innerHTML;
columnData = columnData.replace(/<\/?[^>]+(>|$)/g, '').replace(/ *\([^)]*\) */g, '').replace(/\n/g, '');
tableKeys[j] = columnData;
});
}
if(i > 0) {
var columns = row.getElementsByTagName('td');
[].forEach.call(columns, function(column, q) {
var columnData = column.innerHTML;
columnData = columnData.replace(/<\/?[^>]+(>|$)/g, '').replace(/\n/g, '');
rowObject[tableKeys[q]] = columnData;
});
tableObject.push(rowObject);
}
});
for(var headline in headlines) {
if(headlines[headline] == n) {
headlines[headline] = tableObject;
}
}
});
for(var headline in headlines) {
if(typeof(headlines[headline]) == 'number') {
delete(headlines[headline]);
}
}
copy(headlines);
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment