Skip to content

Instantly share code, notes, and snippets.

@noname01
Last active August 29, 2015 14:06
Show Gist options
  • Save noname01/de514aaff96b90e69ee5 to your computer and use it in GitHub Desktop.
Save noname01/de514aaff96b90e69ee5 to your computer and use it in GitHub Desktop.
node.js script to scrape uw time schedule homepage
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var url = "http://www.washington.edu/students/timeschd/AUT2014/";
request(url, function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var data = {};
$('body>ul li a[href$=".html"]').each(function(i, u){
var str = $(u).text();
if($(u).parent().text().toLowerCase().indexOf("see") < 0){
var full = str.replace(/\s*(.*)\s*\((.*)\)/, "$1").trim();
var abbr = str.replace(/\s*(.*)\s*\((.*)\)/, "$2").replace(/\s+/, "").toLowerCase();
data[abbr] = {fullname: full, page: $(u).attr("href")};
}
})
fs.writeFile("catalog.json", JSON.stringify(data, null, " "));
}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment