Last active
November 3, 2020 17:50
-
-
Save JasonGross/dfd89b18f934137ffcd8 to your computer and use it in GitHub Desktop.
Scrapers for Splash classes for 2015, in a combination of python and javascript
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html> | |
<!--// run `python -m SimpleHTTPServer 8000`, and visit http://localhost:8000/html-scrape-classes.html--> | |
<!--// run `python3 -m http.server 8000`, and visit http://localhost:8000/html-scrape-classes.html--> | |
<head> | |
<script type="text/javascript"> | |
// follow the instructions at https://developers.google.com/google-apps/calendar/quickstart/js, using this in place of the quickstart file | |
// the output of the following javascript, run in the chrome console on https://esp.mit.edu/teach/Splash/2015/teacherreg, below | |
/* | |
var forPython = false; | |
var splashYear;// = 2016; | |
var splashMonth;// = 3; | |
var splashFirstDay;// = 12; | |
// http://stackoverflow.com/a/7244288/377022 | |
function ISODateString(d) { | |
function pad(n) { | |
return n < 10 ? '0' + n : n | |
} | |
return d.getUTCFullYear() + '-' + pad(d.getUTCMonth() + 1) + '-' + pad(d.getUTCDate()) + 'T' + pad(d.getUTCHours()) + ':' + pad(d.getUTCMinutes()) + ':' + pad(d.getUTCSeconds()) + 'Z' | |
} | |
// http://stackoverflow.com/a/13566675/377022 | |
function getMonthFromString(mon){ | |
return new Date(Date.parse(mon +" 1, 2012")).getMonth()+1 | |
} | |
function getHTMLAndTextOfURL(url, cb) { | |
var ifr = document.createElement("iframe"); | |
ifr.src = url; | |
ifr.style = "display:none;"; | |
ifr.onload = function () { | |
var html = ifr.contentWindow.document.body.innerHTML; | |
var text = ifr.contentWindow.document.body.innerText; | |
var doc = ifr.contentWindow.document; | |
document.body.removeChild(ifr); | |
cb(html, text, doc); | |
} | |
document.body.appendChild(ifr); | |
} | |
function makeEventObject(className, room, time, description) { | |
var timeSplitReg = /([A-Za-z]*)\s*([0-9]*):([0-9]*)([ap]m)[-\s]*([0-9]*):([0-9]*)([ap]m)/; | |
if (time.match(timeSplitReg) !== null) { | |
var timeParts = time.match(timeSplitReg); | |
var day = splashFirstDay; | |
if (timeParts[1] == 'Sun') { | |
day += 1; | |
} else if (timeParts[1] != 'Sat') { | |
console.log('Warning: Unrecognized Day: ' + timeParts[1] + ' for ' + className); | |
} | |
var startHours = parseInt(timeParts[2]); | |
var startMinutes = parseInt(timeParts[3]); | |
var endHours = parseInt(timeParts[5]); | |
var endMinutes = parseInt(timeParts[6]); | |
if (timeParts[4] == 'pm') { | |
if (startHours < 12) startHours += 12; | |
} else if (timeParts[4] != 'am') { | |
console.log('Warning: Unrecognized Time Modifier: ' + timeParts[4] + ' for ' + className); | |
} | |
if (timeParts[7] == 'pm') { | |
if (endHours < 12) endHours += 12; | |
} else if (timeParts[7] != 'am') { | |
console.log('Warning: Unrecognized Time Modifier: ' + timeParts[7] + ' for ' + className); | |
} | |
var start = new Date(splashYear, splashMonth - 1, day, startHours, startMinutes, 0, 0); | |
var end = new Date(splashYear, splashMonth - 1, day, endHours, endMinutes, 0, 0); | |
return { | |
'kind': 'calendar#event', | |
'location': room, | |
'summary': className, | |
'description': description, | |
'end': { | |
'dateTime': ISODateString(end) | |
}, | |
'start': { | |
'dateTime': ISODateString(start) | |
} | |
}; | |
} else { | |
console.log('Warning: Unrecognized Time: ' + time + ' for ' + className); | |
return [className, room, time]; | |
} | |
} | |
function makeLink(event) { | |
return 'https://www.google.com/calendar/event?action=TEMPLATE&dates=' + | |
encodeURIComponent(event['start']['dateTime'].replace(/[:-]/g, '') + '/' + event['end']['dateTime'].replace(/[:-]/g, '')) + | |
'&text=' + encodeURIComponent(event['summary']) + | |
'&location=' + encodeURIComponent(event['location']) + | |
'&details=' + encodeURIComponent(event['description']); | |
} | |
function makeButton(event) { | |
var url = makeLink(event); | |
var btn = document.createElement("a"); | |
btn.className = "abutton"; | |
btn.href = url; | |
btn.innerText = "Add to Google Calendar"; | |
return btn; | |
} | |
function addCalendarLink(event, row) { | |
var btn = makeButton(event); | |
var cell = row.getElementsByClassName('clsright')[0]; | |
var existing = cell.getElementsByTagName('a'); | |
for (var i = 0; i < existing.length; i++) { | |
if (existing[i].innerText == btn.innerText) { | |
cell.removeChild(existing[i]); | |
} | |
} | |
cell.appendChild(btn); | |
} | |
function getInnerText(elem, defaultv) { | |
if (elem !== undefined) { | |
return elem.innerText; | |
} | |
return defaultv; | |
} | |
function doMain() { | |
var roomReg = /Room:\s*((?:Zoom Room ?)[0-9-]+)/; | |
var timeReg = /Time:\s*([A-Za-z\s]*[0-9:\s]*[ap]m[\s-]*[0-9:\s]*[ap]m)/; | |
var rows = document.getElementsByTagName('tr'); | |
var classes = []; | |
var className = null; | |
var longClassName = null; | |
var classDescription = null; | |
function doNext(i) { | |
if (i < rows.length) { | |
var names = rows[i].getElementsByClassName('classname'); | |
if (names.length == 1) { | |
className = names[0].innerText.replace('»', ''); | |
var fragment = document.createElement('div'); | |
fragment.innerHTML = names[0].innerHTML; | |
console.log(className); | |
getHTMLAndTextOfURL(fragment.getElementsByTagName('a')[0].href, function (html, text, doc) { | |
classDescription = getInnerText(doc.getElementsByClassName('class_content')[0], "").replace(/^\s+/g,'').replace(/\s+$/g,'').replace(/\n +/g, ''); | |
longClassName = getInnerText(doc.getElementsByClassName('class_title')[0], className).replace(/^\s+/g,'').replace(/\s+$/g,''); | |
console.log(longClassName); | |
doNext(i + 1); | |
}); | |
} else if (rows[i].innerText.match(roomReg) !== null && className !== null) { | |
var theRoom = rows[i].innerText.match(roomReg)[1]; | |
var theTime = rows[i].innerText.match(timeReg)[1]; | |
// console.log(theRoom); | |
// console.log(theTime); | |
var event = makeEventObject(longClassName, theRoom, theTime, classDescription); | |
classes[classes.length] = event; | |
addCalendarLink(event, rows[i]); | |
doNext(i + 1); | |
} else { | |
doNext(i + 1); | |
} | |
} else { | |
var result = JSON.stringify(classes); | |
if (forPython) { | |
result = result.replace(/":"/g, '":u"'); | |
} | |
console.log('CLASSES = ' + result); | |
} | |
}; | |
doNext(0); | |
} | |
function main0() { | |
var datesReg = /Dates: [A-Za-z ]*([0-9]+) will be on Saturday, ([A-Za-z]+) ([0-9]+)/; | |
var datesReg2 = /[A-Za-z]+ ([0-9]+) will run (Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|(?:Nov|Dec)(?:ember)?) ([0-9]+)/; | |
getHTMLAndTextOfURL(window.location.href.replace(/[0-9]+\/teacherreg/, "moreinfo.html"), function handleInfo(html, text) { | |
var datesMatch = text.match(datesReg); | |
if (datesMatch === null) datesMatch = text.match(datesReg2); | |
console.log(datesMatch); | |
splashYear = parseInt(datesMatch[1]); | |
splashMonth = getMonthFromString(datesMatch[2]); | |
splashFirstDay = parseInt(datesMatch[3]); | |
doMain(); | |
}); | |
} | |
main0(); | |
*/ | |
// paste output here: | |
var CLASSES = [{"kind":"calendar#event","location":"Zoom Room 14","summary":"14173: Proving Löb's Theorem","description":"","end":{"dateTime":"2020-11-15T16:55:00Z"},"start":{"dateTime":"2020-11-15T16:05:00Z"}},{"kind":"calendar#event","location":"Zoom Room 18","summary":"14172: Linear Logic","description":"","end":{"dateTime":"2020-11-15T18:55:00Z"},"start":{"dateTime":"2020-11-15T18:05:00Z"}},{"kind":"calendar#event","location":"Zoom Room 18","summary":"14170: Circling - Intersubjective Mindfulness Meditation","description":"","end":{"dateTime":"2020-11-14T20:55:00Z"},"start":{"dateTime":"2020-11-14T19:05:00Z"}},{"kind":"calendar#event","location":"Zoom Room 18","summary":"14169: Making deep friendships - Circling","description":"","end":{"dateTime":"2020-11-14T22:55:00Z"},"start":{"dateTime":"2020-11-14T21:05:00Z"}}] | |
// Your Client ID can be retrieved from your project in the Google | |
// Developer Console, https://console.developers.google.com | |
var CLIENT_ID = '683712212268-1il3ta3ubsnutnet6ig8m6ngj0j9mb1f.apps.googleusercontent.com'; | |
var SCOPES = ["https://www.googleapis.com/auth/calendar"]; | |
/** | |
* Check if current user has authorized this application. | |
*/ | |
function checkAuth() { | |
gapi.auth.authorize({ | |
'client_id': CLIENT_ID, | |
'scope': SCOPES.join(' '), | |
'immediate': true | |
}, handleAuthResult); | |
} | |
/** | |
* Handle response from authorization server. | |
* | |
* @param {Object} authResult Authorization result. | |
*/ | |
function handleAuthResult(authResult) { | |
var authorizeDiv = document.getElementById('authorize-div'); | |
if (authResult && !authResult.error) { | |
// Hide auth UI, then load client library. | |
authorizeDiv.style.display = 'none'; | |
loadCalendarApi(); | |
} else { | |
// Show auth UI, allowing the user to initiate authorization by | |
// clicking authorize button. | |
authorizeDiv.style.display = 'inline'; | |
} | |
} | |
/** | |
* Initiate auth flow in response to user clicking authorize button. | |
* | |
* @param {Event} event Button click event. | |
*/ | |
function handleAuthClick(event) { | |
gapi.auth.authorize({ | |
client_id: CLIENT_ID, | |
scope: SCOPES, | |
immediate: false | |
}, | |
handleAuthResult); | |
return false; | |
} | |
/** | |
* Load Google Calendar client library. List upcoming events | |
* once client library is loaded. | |
*/ | |
function loadCalendarApi() { | |
gapi.client.load('calendar', 'v3', addClasses); | |
} | |
/** | |
* Append a pre element to the body containing the given message | |
* as its text node. | |
* | |
* @param {string} message Text to be placed in pre element. | |
*/ | |
function appendPre(message) { | |
var pre = document.getElementById('output'); | |
var textContent = document.createTextNode(message + '\n'); | |
pre.appendChild(textContent); | |
} | |
function addClass(i) { | |
var classEvent = CLASSES[i]; | |
var eventRequest = gapi.client.calendar.events.list({ | |
'calendarId': 'primary', | |
'timeMin': classEvent['start']['dateTime'], | |
'timeMax': classEvent['end']['dateTime'], | |
'maxResults': 10, | |
'singleEvents': true, | |
'orderBy': 'startTime' | |
}); | |
eventRequest.execute(function(resp) { | |
var events = resp.items; | |
var found = false; | |
// console.log(events); | |
for (var j = 0; j < events.length && !found; j++) { | |
var existingEvent = events[j]; | |
// console.log([existingEvent.summary.substring(0, 6), classEvent['summary'].substring(0, 6), existingEvent.summary.substring(0, 6) == classEvent['summary'].substring(0, 6)]); | |
if (existingEvent.summary.substring(0, 6) == classEvent['summary'].substring(0, 6)) { | |
appendPre('Found existing event ' + existingEvent.summary + ' which seems to match event ' + classEvent['summary'] + '; not adding event'); | |
found = true; | |
} | |
} | |
if (!found) { | |
gapi.client.calendar.events.insert({ | |
'calendarId': 'primary', | |
'resource': classEvent | |
}).execute(function(event) { | |
// console.log(classEvent); | |
// console.log(event); | |
appendPre('Event created for ' + event.summary + ': ' + event.htmlLink); | |
}); | |
} | |
}); | |
} | |
function addClasses() { | |
for (var i = 0; i < CLASSES.length; i++) { | |
addClass(i); | |
} | |
} | |
</script> | |
<script src="https://apis.google.com/js/client.js?onload=checkAuth"> | |
</script> | |
</head> | |
<body> | |
<div id="authorize-div" style="display: none"> | |
<span>Authorize access to Google Calendar API</span> | |
<!--Button for the user to click to initiate auth sequence --> | |
<button id="authorize-button" onclick="handleAuthClick(event)"> | |
Authorize | |
</button> | |
</div> | |
<pre id="output"></pre> | |
</body> | |
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var forPython = false; | |
var splashYear;// = 2016; | |
var splashMonth;// = 3; | |
var splashFirstDay;// = 12; | |
// http://stackoverflow.com/a/7244288/377022 | |
function ISODateString(d) { | |
function pad(n) { | |
return n < 10 ? '0' + n : n | |
} | |
return d.getUTCFullYear() + '-' + pad(d.getUTCMonth() + 1) + '-' + pad(d.getUTCDate()) + 'T' + pad(d.getUTCHours()) + ':' + pad(d.getUTCMinutes()) + ':' + pad(d.getUTCSeconds()) + 'Z' | |
} | |
// http://stackoverflow.com/a/13566675/377022 | |
function getMonthFromString(mon){ | |
return new Date(Date.parse(mon +" 1, 2012")).getMonth()+1 | |
} | |
function getHTMLAndTextOfURL(url, cb) { | |
var ifr = document.createElement("iframe"); | |
ifr.src = url; | |
ifr.style = "display:none;"; | |
ifr.onload = function () { | |
var html = ifr.contentWindow.document.body.innerHTML; | |
var text = ifr.contentWindow.document.body.innerText; | |
var doc = ifr.contentWindow.document; | |
document.body.removeChild(ifr); | |
cb(html, text, doc); | |
} | |
document.body.appendChild(ifr); | |
} | |
function makeEventObject(className, room, time, description) { | |
var timeSplitReg = /([A-Za-z]*)\s*([0-9]*):([0-9]*)([ap]m)[-\s]*([0-9]*):([0-9]*)([ap]m)/; | |
if (time.match(timeSplitReg) !== null) { | |
var timeParts = time.match(timeSplitReg); | |
var day = splashFirstDay; | |
if (timeParts[1] == 'Sun') { | |
day += 1; | |
} else if (timeParts[1] != 'Sat') { | |
console.log('Warning: Unrecognized Day: ' + timeParts[1] + ' for ' + className); | |
} | |
var startHours = parseInt(timeParts[2]); | |
var startMinutes = parseInt(timeParts[3]); | |
var endHours = parseInt(timeParts[5]); | |
var endMinutes = parseInt(timeParts[6]); | |
if (timeParts[4] == 'pm') { | |
if (startHours < 12) startHours += 12; | |
} else if (timeParts[4] != 'am') { | |
console.log('Warning: Unrecognized Time Modifier: ' + timeParts[4] + ' for ' + className); | |
} | |
if (timeParts[7] == 'pm') { | |
if (endHours < 12) endHours += 12; | |
} else if (timeParts[7] != 'am') { | |
console.log('Warning: Unrecognized Time Modifier: ' + timeParts[7] + ' for ' + className); | |
} | |
var start = new Date(splashYear, splashMonth - 1, day, startHours, startMinutes, 0, 0); | |
var end = new Date(splashYear, splashMonth - 1, day, endHours, endMinutes, 0, 0); | |
return { | |
'kind': 'calendar#event', | |
'location': room, | |
'summary': className, | |
'description': description, | |
'end': { | |
'dateTime': ISODateString(end) | |
}, | |
'start': { | |
'dateTime': ISODateString(start) | |
} | |
}; | |
} else { | |
console.log('Warning: Unrecognized Time: ' + time + ' for ' + className); | |
return [className, room, time]; | |
} | |
} | |
function makeLink(event) { | |
return 'https://www.google.com/calendar/event?action=TEMPLATE&dates=' + | |
encodeURIComponent(event['start']['dateTime'].replace(/[:-]/g, '') + '/' + event['end']['dateTime'].replace(/[:-]/g, '')) + | |
'&text=' + encodeURIComponent(event['summary']) + | |
'&location=' + encodeURIComponent(event['location']) + | |
'&details=' + encodeURIComponent(event['description']); | |
} | |
function makeButton(event) { | |
var url = makeLink(event); | |
var btn = document.createElement("a"); | |
btn.className = "abutton"; | |
btn.href = url; | |
btn.innerText = "Add to Google Calendar"; | |
return btn; | |
} | |
function addCalendarLink(event, row) { | |
var btn = makeButton(event); | |
var cell = row.getElementsByClassName('clsright')[0]; | |
var existing = cell.getElementsByTagName('a'); | |
for (var i = 0; i < existing.length; i++) { | |
if (existing[i].innerText == btn.innerText) { | |
cell.removeChild(existing[i]); | |
} | |
} | |
cell.appendChild(btn); | |
} | |
function getInnerText(elem, defaultv) { | |
if (elem !== undefined) { | |
return elem.innerText; | |
} | |
return defaultv; | |
} | |
function doMain() { | |
var roomReg = /Room:\s*((?:Zoom Room ?)[0-9-]+)/; | |
var timeReg = /Time:\s*([A-Za-z\s]*[0-9:\s]*[ap]m[\s-]*[0-9:\s]*[ap]m)/; | |
var rows = document.getElementsByTagName('tr'); | |
var classes = []; | |
var className = null; | |
var longClassName = null; | |
var classDescription = null; | |
function doNext(i) { | |
if (i < rows.length) { | |
var names = rows[i].getElementsByClassName('classname'); | |
if (names.length == 1) { | |
className = names[0].innerText.replace('»', ''); | |
var fragment = document.createElement('div'); | |
fragment.innerHTML = names[0].innerHTML; | |
console.log(className); | |
getHTMLAndTextOfURL(fragment.getElementsByTagName('a')[0].href, function (html, text, doc) { | |
classDescription = getInnerText(doc.getElementsByClassName('class_content')[0], "").replace(/^\s+/g,'').replace(/\s+$/g,'').replace(/\n +/g, ''); | |
longClassName = getInnerText(doc.getElementsByClassName('class_title')[0], className).replace(/^\s+/g,'').replace(/\s+$/g,''); | |
console.log(longClassName); | |
doNext(i + 1); | |
}); | |
} else if (rows[i].innerText.match(roomReg) !== null && className !== null) { | |
var theRoom = rows[i].innerText.match(roomReg)[1]; | |
var theTime = rows[i].innerText.match(timeReg)[1]; | |
// console.log(theRoom); | |
// console.log(theTime); | |
var event = makeEventObject(longClassName, theRoom, theTime, classDescription); | |
classes[classes.length] = event; | |
addCalendarLink(event, rows[i]); | |
doNext(i + 1); | |
} else { | |
doNext(i + 1); | |
} | |
} else { | |
var result = JSON.stringify(classes); | |
if (forPython) { | |
result = result.replace(/":"/g, '":u"'); | |
} | |
console.log('CLASSES = ' + result); | |
} | |
}; | |
doNext(0); | |
} | |
function main0() { | |
var datesReg = /Dates: [A-Za-z ]*([0-9]+) will be on Saturday, ([A-Za-z]+) ([0-9]+)/; | |
var datesReg2 = /[A-Za-z]+ ([0-9]+) will run (Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|(?:Nov|Dec)(?:ember)?) ([0-9]+)/; | |
getHTMLAndTextOfURL(window.location.href.replace(/[0-9]+\/teacherreg/, "moreinfo.html"), function handleInfo(html, text) { | |
var datesMatch = text.match(datesReg); | |
if (datesMatch === null) datesMatch = text.match(datesReg2); | |
console.log(datesMatch); | |
splashYear = parseInt(datesMatch[1]); | |
splashMonth = getMonthFromString(datesMatch[2]); | |
splashFirstDay = parseInt(datesMatch[3]); | |
doMain(); | |
}); | |
} | |
main0(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment