Skip to content

Instantly share code, notes, and snippets.

@TehWardy
Forked from Ashley-Upson/DUscraper.js
Last active September 24, 2017 16:27
Show Gist options
  • Save TehWardy/219f7e41f53d2b60b0a0817892fd18af to your computer and use it in GitHub Desktop.
Save TehWardy/219f7e41f53d2b60b0a0817892fd18af to your computer and use it in GitHub Desktop.
scraper = {
delay: 100,
lastRowShown: 0,
lastRowInSet: 0,
results: [],
complete: false,
prev: 1,
run: function()
{
console.log("Initialising");
var countInfo = $("#all_organizations_info").text();
var countParts = countInfo.split(' ');
scraper.lastRowInSet = parseInt(countParts[5].replace(',', ''));
console.log("Fetching " + scraper.lastRowInSet + " orgs from site");
scraper.scrape();
setTimeout(scraper.waitForCompletion, 1000);
},
scrape: function()
{
var countInfo = $("#all_organizations_info").text();
var countParts = countInfo.split(' ');
var lastrowOnPage = parseInt(countParts[3].replace(',', ''));
scraper.lastRowShown = lastrowOnPage;
if(scraper.lastRowShown == scraper.prev)
{
setTimeout(scraper.scrape, scraper.delay);
} else {
scraper.processGrid();
$("#all_organizations_next a").click();
scraper.prev = lastrowOnPage;
if(lastrowOnPage < scraper.lastRowInSet && scraper.complete == false)
{
scraper.scrape();
} else {
console.log("Processing complete!");
scraper.complete = true;
}
}
},
processGrid: function()
{
console.log($("#all_organizations_info").text().replace("Showing", "Scraping orgs"));
var grid = $("#all_organizations");
var orgRows = $("tbody > tr[role=row]", grid);
for(var i = 0; i < orgRows.length; i++) {
scraper.parseRow($(orgRows[i]));
}
},
parseRow: function(row) {
var cells = $("td", row);
var org = {
Image: $("img", cells[0]).attr("src"),
OrgName: $(cells[1]).text(),
OrgLink: "https://community.dualthegame.com" + $("a", $(cells[1])).attr("href"),
CreatedOn: $(cells[2]).text(),
MemberCount: $(cells[4]).text()
};
scraper.results.push(org);
},
waitForCompletion: function() {
if(scraper.complete) {
scraper.download("Orgs.json");
}
else {
setTimeout(scraper.waitForCompletion, 1000);
}
},
download: function(filename) {
var a = document.createElement('a');
a.href = URL.createObjectURL(new Blob([JSON.stringify(scraper.results, null, 2)], { type: 'text/json' }));
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
}
};
scraper.run();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment