Skip to content

Instantly share code, notes, and snippets.

@quadney
Forked from joshblack/courseParser.js
Created May 5, 2014 18:28
Show Gist options
  • Save quadney/84e0c0261c81b240babc to your computer and use it in GitHub Desktop.
Save quadney/84e0c0261c81b240babc to your computer and use it in GitHub Desktop.
var Crawler = require("crawler").Crawler,
fs = require('graceful-fs'),
courseDictionary = require('./courseDictionary.json'),
buildingDictionary = require('./buildingDictionary.json'),
i = 1,
request = 0;
var c = new Crawler({
"maxConnections": 1, // Set to one so we don't run into concurrency issues when it comes to
// writing to our result files
// This will be called for each crawled page
"callback": function(error, result, $) {
var rowCourseData = [],
rowMeetingData = [];
request++;
$('center table tr').each(function(index, elem) {
var course, courseCode, courseNum,
sectionNum,
creditsNum,
meetingDays,
periodNum,
buildingCode,
roomNum,
courseTitle,
instructors;
$(elem).children().each(function(index, elem) {
if (!$(elem).hasClass('colhelp') && $(elem).prop('tagName') !== "TH") {
// Grab the data values we need for our row data
switch (index) {
case 0: // Grab the course code
if ($(elem).children('a').html() !== undefined) {
course = $(elem).children('a').html().replace(/\s+/g, ''), // Grab course and trim white space
courseCode = course.substring(0, 3),
courseNum = course.substring(3);
break;
}
else {
course = undefined;
break;
}
break;
case 5: // Grab the secion number
sectionNum = $.trim($(elem).children('b').html());
break;
case 6: // Grab the amount of credits
creditsNum = $.trim($(elem).html());
break;
case 7: // Grab the meeting days
meetingDays = $.trim($(elem).html()).split(' '); // separate each day
// Go through the meeting days and make sure they are valid, otherwise
// we set them to undefined so they can't be added to our database
meetingDays.forEach(function(data) {
if (data === 'TBA' || data === '')
meetingDays = undefined;
});
break;
case 8: // Grab the Period
periodNum = $(elem).html();
break;
case 9: // Grab the Bldg code
buildingCode = $(elem).html();
break;
case 10: // Grab the room number
roomNum = $(elem).html();
break;
case 12: // Grab the Course title
courseTitle = $.trim($(elem).children('a').html());
break;
case 13: // Grab the Instructor names
instructors = $.trim($(elem).html().replace(/\n/g, '').replace(/<br[^>]*>/gi, ' and '));
break;
default:
break;
}
}
});
// If our course code is undefined and we have valid meeting days we need to pull
// info from the last entry in our array
if (courseCode === undefined && meetingDays !== undefined) {
// Go through the meeting days available in this cyle and grab data that it's missing
// from the previous array entry for the creation of a new meeting
meetingDays.forEach(function(data) {
if (rowMeetingData[rowMeetingData.length - 1] !== undefined) {
rowMeetingData.push([
rowMeetingData[rowMeetingData.length - 1][0],
rowMeetingData[rowMeetingData.length - 1][1],
roomNum,
data,
periodNum
]);
}
});
}
else {
// add a new entry into the row course data
// rowCourseData.push([courseCode, courseNum, sectionNum, creditsNum, instructors, courseTitle, 'spring', '2014']);
// Throw out cases where meetingDays is undefined, we don't want those
if (meetingDays !== undefined) {
// Go through each day in our array and make a meeting for it
meetingDays.forEach(function(meetingDay) {
// Parse through our courseDictionary to find the course id for the meeting
courseDictionary.data.forEach(function(courseData) {
if (courseData.deptCode === courseCode && courseData.courseNumber === courseNum && courseData.sectionNumber === sectionNum) {
// Parse through our buildingDictionary to find the building id for the meeting
buildingDictionary.data.forEach(function(buildingData) {
if (buildingData.buildingCode === buildingCode) {
// We found everything we need! push the data to the meeting data array and
// exit out of the loop
rowMeetingData.push([courseData.id, buildingData.id, roomNum, meetingDay, periodNum]);
return false;
}
});
// exit out of the course dictionary loop after we found our building id
return false;
}
});
});
}
}
});
var finalCourseData = [],
finalMeetingData = [];
rowCourseData.forEach(function(data) {
var formattedCourseData = "['deptCode' => '" + data[0] +
"', 'courseNumber' => '" + data[1] +
"', 'sectionNumber' => '" + data[2] +
"', 'credits' => '" + data[3] +
"', 'instructor' => '" + data[4] +
"', 'courseTitle' => '" + data[5] +
"', 'semester' => '" + data[6] +
"', 'year' => " + data[7] + ']';
finalCourseData.push(formattedCourseData);
});
finalCourseData.forEach(function(data) {
fs.appendFile('results.txt', data + ',\n', function(err) {
if (err) throw err;
console.log('The data for ' + data.substring(16, 19) + data.substring(41, 45) + ' was appended to the file!');
});
});
rowMeetingData.forEach(function(data) {
var formattedMeetingData = "['course_id' => '" + data[0] +
"', 'building_id' => '" + data[1] +
"', 'roomNumber' => '" + data[2] +
"', 'meetingDay' => '" + data[3] +
"', 'period' => '" + data[4] + '\']';
finalMeetingData.push(formattedMeetingData);
});
finalMeetingData.forEach(function(data) {
fs.appendFile('meetingsData.txt', data + ',\n', function(err) {
if (err) throw err;
console.log('The data for meeting #' + i + ' was appended to the file!');
i++;
});
});
}
});
// Queue a list of URLs
c.queue([
'http://registrar.ufl.edu/soc/201401/all/accounts.htm',
'http://registrar.ufl.edu/soc/201401/all/advertis.htm',
'http://registrar.ufl.edu/soc/201401/all/aframstu.htm',
'http://registrar.ufl.edu/soc/201401/all/afrstudi.htm',
'http://registrar.ufl.edu/soc/201401/all/agribioe.htm',
'http://registrar.ufl.edu/soc/201401/all/agriedco.htm',
'http://registrar.ufl.edu/soc/201401/all/agriopma.htm',
'http://registrar.ufl.edu/soc/201401/all/agriture.htm',
'http://registrar.ufl.edu/soc/201401/all/agronomy.htm',
'http://registrar.ufl.edu/soc/201401/all/animalsc.htm',
'http://registrar.ufl.edu/soc/201401/all/anthropo.htm',
'http://registrar.ufl.edu/soc/201401/all/applphys.htm',
'http://registrar.ufl.edu/soc/201401/all/architec.htm',
'http://registrar.ufl.edu/soc/201401/all/arthisto.htm',
'http://registrar.ufl.edu/soc/201401/all/astronom.htm',
'http://registrar.ufl.edu/soc/201401/all/bibozobi.htm',
'http://registrar.ufl.edu/soc/201401/all/bibozobo.htm',
'http://registrar.ufl.edu/soc/201401/all/bibozozo.htm',
'http://registrar.ufl.edu/soc/201401/all/biomedeg.htm',
'http://registrar.ufl.edu/soc/201401/all/biostati.htm',
'http://registrar.ufl.edu/soc/201401/all/business.htm',
'http://registrar.ufl.edu/soc/201401/all/chemical.htm',
'http://registrar.ufl.edu/soc/201401/all/chemistr.htm',
'http://registrar.ufl.edu/soc/201401/all/civcseng.htm',
'http://registrar.ufl.edu/soc/201401/all/classicc.htm',
'http://registrar.ufl.edu/soc/201401/all/classicg.htm',
'http://registrar.ufl.edu/soc/201401/all/classicl.htm',
'http://registrar.ufl.edu/soc/201401/all/clinicap.htm',
'http://registrar.ufl.edu/soc/201401/all/computer.htm',
'http://registrar.ufl.edu/soc/201401/all/construc.htm',
'http://registrar.ufl.edu/soc/201401/all/denodiag.htm',
'http://registrar.ufl.edu/soc/201401/all/desconpl.htm',
'http://registrar.ufl.edu/soc/201401/all/digworld.htm',
'http://registrar.ufl.edu/soc/201401/all/economic.htm',
'http://registrar.ufl.edu/soc/201401/all/educahdo.htm',
'http://registrar.ufl.edu/soc/201401/all/educasep.htm',
'http://registrar.ufl.edu/soc/201401/all/educattl.htm',
'http://registrar.ufl.edu/soc/201401/all/electric.htm',
'http://registrar.ufl.edu/soc/201401/all/engingen.htm',
'http://registrar.ufl.edu/soc/201401/all/englishs.htm',
'http://registrar.ufl.edu/soc/201401/all/entomolo.htm',
'http://registrar.ufl.edu/soc/201401/all/envglohe.htm',
'http://registrar.ufl.edu/soc/201401/all/environm.htm',
'http://registrar.ufl.edu/soc/201401/all/envrhort.htm',
'http://registrar.ufl.edu/soc/201401/all/epidemio.htm',
'http://registrar.ufl.edu/soc/201401/all/european.htm',
'http://registrar.ufl.edu/soc/201401/all/famscien.htm',
'http://registrar.ufl.edu/soc/201401/all/finances.htm',
'http://registrar.ufl.edu/soc/201401/all/finearts.htm',
'http://registrar.ufl.edu/soc/201401/all/firstyrf.htm',
'http://registrar.ufl.edu/soc/201401/all/fishsfrc.htm',
'http://registrar.ufl.edu/soc/201401/all/flexlear.htm',
'http://registrar.ufl.edu/soc/201401/all/foodreso.htm',
'http://registrar.ufl.edu/soc/201401/all/foodscie.htm',
'http://registrar.ufl.edu/soc/201401/all/forresco.htm',
'http://registrar.ufl.edu/soc/201401/all/geograph.htm',
'http://registrar.ufl.edu/soc/201401/all/geomatic.htm',
'http://registrar.ufl.edu/soc/201401/all/geoscien.htm',
'http://registrar.ufl.edu/soc/201401/all/healthed.htm',
'http://registrar.ufl.edu/soc/201401/all/healthop.htm',
'http://registrar.ufl.edu/soc/201401/all/healthpr.htm',
'http://registrar.ufl.edu/soc/201401/all/healthsa.htm',
'http://registrar.ufl.edu/soc/201401/all/historys.htm',
'http://registrar.ufl.edu/soc/201401/all/honorspr.htm',
'http://registrar.ufl.edu/soc/201401/all/horticul.htm',
'http://registrar.ufl.edu/soc/201401/all/industri.htm',
'http://registrar.ufl.edu/soc/201401/all/informat.htm',
'http://registrar.ufl.edu/soc/201401/all/innovati.htm',
'http://registrar.ufl.edu/soc/201401/all/interdis.htm',
'http://registrar.ufl.edu/soc/201401/all/interior.htm',
'http://registrar.ufl.edu/soc/201401/all/jewishst.htm',
'http://registrar.ufl.edu/soc/201401/all/journali.htm',
'http://registrar.ufl.edu/soc/201401/all/landscap.htm',
'http://registrar.ufl.edu/soc/201401/all/langaaaa.htm',
'http://registrar.ufl.edu/soc/201401/all/langakan.htm',
'http://registrar.ufl.edu/soc/201401/all/langamha.htm',
'http://registrar.ufl.edu/soc/201401/all/langarab.htm',
'http://registrar.ufl.edu/soc/201401/all/langchin.htm',
'http://registrar.ufl.edu/soc/201401/all/langczec.htm',
'http://registrar.ufl.edu/soc/201401/all/langdutc.htm',
'http://registrar.ufl.edu/soc/201401/all/langfren.htm',
'http://registrar.ufl.edu/soc/201401/all/langgerm.htm',
'http://registrar.ufl.edu/soc/201401/all/langhait.htm',
'http://registrar.ufl.edu/soc/201401/all/langhebr.htm',
'http://registrar.ufl.edu/soc/201401/all/langital.htm',
'http://registrar.ufl.edu/soc/201401/all/langjapa.htm',
'http://registrar.ufl.edu/soc/201401/all/langpoli.htm',
'http://registrar.ufl.edu/soc/201401/all/langruss.htm',
'http://registrar.ufl.edu/soc/201401/all/langswah.htm',
'http://registrar.ufl.edu/soc/201401/all/langviet.htm',
'http://registrar.ufl.edu/soc/201401/all/langwolo.htm',
'http://registrar.ufl.edu/soc/201401/all/langyoru.htm',
'http://registrar.ufl.edu/soc/201401/all/latiname.htm',
'http://registrar.ufl.edu/soc/201401/all/lawschoo.htm',
'http://registrar.ufl.edu/soc/201401/all/lawtaxat.htm',
'http://registrar.ufl.edu/soc/201401/all/linguist.htm',
'http://registrar.ufl.edu/soc/201401/all/manageme.htm',
'http://registrar.ufl.edu/soc/201401/all/marketin.htm',
'http://registrar.ufl.edu/soc/201401/all/masscomm.htm',
'http://registrar.ufl.edu/soc/201401/all/material.htm',
'http://registrar.ufl.edu/soc/201401/all/mathemat.htm',
'http://registrar.ufl.edu/soc/201401/all/mechaero.htm',
'http://registrar.ufl.edu/soc/201401/all/mediaaaa.htm',
'http://registrar.ufl.edu/soc/201401/all/medianat.htm',
'http://registrar.ufl.edu/soc/201401/all/medianes.htm',
'http://registrar.ufl.edu/soc/201401/all/medibioc.htm',
'http://registrar.ufl.edu/soc/201401/all/medicomm.htm',
'http://registrar.ufl.edu/soc/201401/all/mediemrg.htm',
'http://registrar.ufl.edu/soc/201401/all/medigene.htm',
'http://registrar.ufl.edu/soc/201401/all/medimole.htm',
'http://registrar.ufl.edu/soc/201401/all/medineur.htm',
'http://registrar.ufl.edu/soc/201401/all/medineus.htm',
'http://registrar.ufl.edu/soc/201401/all/mediobst.htm',
'http://registrar.ufl.edu/soc/201401/all/mediopht.htm',
'http://registrar.ufl.edu/soc/201401/all/mediortr.htm',
'http://registrar.ufl.edu/soc/201401/all/mediotol.htm',
'http://registrar.ufl.edu/soc/201401/all/medipath.htm',
'http://registrar.ufl.edu/soc/201401/all/medipedi.htm',
'http://registrar.ufl.edu/soc/201401/all/mediphas.htm',
'http://registrar.ufl.edu/soc/201401/all/mediphys.htm',
'http://registrar.ufl.edu/soc/201401/all/medipsyc.htm',
'http://registrar.ufl.edu/soc/201401/all/mediradi.htm',
'http://registrar.ufl.edu/soc/201401/all/mediraon.htm',
'http://registrar.ufl.edu/soc/201401/all/medisurg.htm',
'http://registrar.ufl.edu/soc/201401/all/medivals.htm',
'http://registrar.ufl.edu/soc/201401/all/microbio.htm',
'http://registrar.ufl.edu/soc/201401/all/miliafor.htm',
'http://registrar.ufl.edu/soc/201401/all/miliarmy.htm',
'http://registrar.ufl.edu/soc/201401/all/milinavy.htm',
'http://registrar.ufl.edu/soc/201401/all/musicapp.htm',
'http://registrar.ufl.edu/soc/201401/all/natresou.htm',
'http://registrar.ufl.edu/soc/201401/all/nuclearr.htm',
'http://registrar.ufl.edu/soc/201401/all/nursinga.htm',
'http://registrar.ufl.edu/soc/201401/all/nursingh.htm',
'http://registrar.ufl.edu/soc/201401/all/nursingw.htm',
'http://registrar.ufl.edu/soc/201401/all/occupati.htm',
'http://registrar.ufl.edu/soc/201401/all/packagsc.htm',
'http://registrar.ufl.edu/soc/201401/all/pestmana.htm',
'http://registrar.ufl.edu/soc/201401/all/pharcchm.htm',
'http://registrar.ufl.edu/soc/201401/all/pharceop.htm',
'http://registrar.ufl.edu/soc/201401/all/pharcets.htm',
'http://registrar.ufl.edu/soc/201401/all/phardyna.htm',
'http://registrar.ufl.edu/soc/201401/all/pharprac.htm',
'http://registrar.ufl.edu/soc/201401/all/philosop.htm',
'http://registrar.ufl.edu/soc/201401/all/physical.htm',
'http://registrar.ufl.edu/soc/201401/all/physicss.htm',
'http://registrar.ufl.edu/soc/201401/all/plantpat.htm',
'http://registrar.ufl.edu/soc/201401/all/politica.htm',
'http://registrar.ufl.edu/soc/201401/all/psycholo.htm',
'http://registrar.ufl.edu/soc/201401/all/pubhealt.htm',
'http://registrar.ufl.edu/soc/201401/all/publicre.htm',
'http://registrar.ufl.edu/soc/201401/all/rehbsci2.htm',
'http://registrar.ufl.edu/soc/201401/all/religion.htm',
'http://registrar.ufl.edu/soc/201401/all/soccrimi.htm',
'http://registrar.ufl.edu/soc/201401/all/socsocio.htm',
'http://registrar.ufl.edu/soc/201401/all/soilwatr.htm',
'http://registrar.ufl.edu/soc/201401/all/spaporpo.htm',
'http://registrar.ufl.edu/soc/201401/all/spaporsp.htm',
'http://registrar.ufl.edu/soc/201401/all/speechlh.htm',
'http://registrar.ufl.edu/soc/201401/all/statisti.htm',
'http://registrar.ufl.edu/soc/201401/all/telecomm.htm',
'http://registrar.ufl.edu/soc/201401/all/theadanc.htm',
'http://registrar.ufl.edu/soc/201401/all/tourismr.htm',
'http://registrar.ufl.edu/soc/201401/all/urbanreg.htm',
'http://registrar.ufl.edu/soc/201401/all/veterina.htm',
'http://registrar.ufl.edu/soc/201401/all/wildlife.htm',
'http://registrar.ufl.edu/soc/201401/all/womenstu.htm',
'http://registrar.ufl.edu/soc/201401/all/writprog.htm',
'http://registrar.ufl.edu/soc/201401/all/writtenc.htm'
]);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment