Skip to content

Instantly share code, notes, and snippets.

@vingkan
Created March 27, 2019 21:22
Show Gist options
  • Save vingkan/f3c5546cf09c966754809bff6232cde6 to your computer and use it in GitHub Desktop.
Save vingkan/f3c5546cf09c966754809bff6232cde6 to your computer and use it in GitHub Desktop.
Roster scraper for HawkLink (2019).
/*
* 1. Go to https://iit.campuslabs.com/engage/organization/acm/roster
* 2. Open the browser console
* 3. Copy and paste this script into the console and run
* 4. Keep tab open and do not click anywhere until the script completes
* 5. Save the page output as a .csv
*/
var showing = document.querySelector('#roster-members').parentElement.children[1].children[0].innerText.split(' ');
var total = parseInt(showing[3]);
var chunk = parseInt(showing[1].split('-')[1]);
loadMembersThen(total, chunk, 500, function() {
console.log('done opening')
var container = document.querySelector('[role="main"]').children[0].children[3].children[0].children[1].children[0].children[0];
var boxes = Array.from(container.children);
var results = [];
getEmailByBoxIndex({
container: container,
results: results,
boxes: boxes,
index: 0,
time: 250,
callback: printResults
});
});
function printResults(results) {
console.log('printing')
results.forEach((res) => {
var out = res.name + ',' + res.email + '<br>';
document.write(out);
});
}
function getEmailByBoxIndex(params) {
var results = params.results;
var boxes = params.boxes;
var index = params.index;
var time = params.time;
var box = boxes[index];
if (box) {
var subBox = box.children[0].children[0];
subBox.click();
querySelectorThen('[role="alert"]', time, function(alertBox) {
var nameBox = alertBox.parentElement.children[2];
var emailBox = alertBox.parentElement.children[3];
if (nameBox && emailBox) {
var name = nameBox.innerText.trim();
var email = emailBox.innerText.trim();
var res = {
name: name,
email: email
};
results.push(res);
console.log(res);
} else {
console.log('Error: No profile found.');
}
var holder = alertBox.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.children[1];
window.alertBox = alertBox;
holder.click();
params.index++;
getEmailByBoxIndex(params);
});
} else {
console.log('Done scraping.');
params.callback(results);
}
}
function querySelectorThen(queryString, time, callback) {
var interval = setInterval(function() {
var nodes = document.querySelectorAll(queryString);
var el = nodes[nodes.length - 1];
if (el) {
clearInterval(interval);
callback(el);
}
}, time);
}
function loadMembersThen(total, chunk, time, callback) {
var i = 0;
var loadInterval = setInterval(function() {
var nodes = Array.from(document.querySelectorAll('span')).filter(d => d.innerText.toLowerCase() === 'load more members');
var loader = nodes[nodes.length - 1];
if (loader) {
loader.click();
document.body.click();
i += chunk;
console.log(`${i} / ${total}`);
if (i >= total) {
console.log(i, total)
clearInterval(loadInterval);
callback();
}
} else {
clearInterval(loadInterval);
callback();
}
}, time);
}
@aubertc
Copy link

aubertc commented Jul 1, 2021

Oh, and for future reference, https://github.com/Supermas123/Engage-Campus-Labs-University-Contact-Information-Webscraper seems quite close to this gist.

@vingkan
Copy link
Author

vingkan commented Jul 8, 2021

Hi @aubertc I have since graduated and my university switched from Campus Labs to a new system, but if you send me an example https://.campuslabs.com/faculty/trends#/ page, I can check it out!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment