Skip to content

Instantly share code, notes, and snippets.

@howdoicomputer
Created April 25, 2015 22:09
Show Gist options
  • Save howdoicomputer/c8e1eb2bb1257ae01ea0 to your computer and use it in GitHub Desktop.
Save howdoicomputer/c8e1eb2bb1257ae01ea0 to your computer and use it in GitHub Desktop.
/* exported Station */
var _ = require('lodash'),
config = require('config'),
request = require('request'),
cheerio = require('cheerio');
/**
* This module contains functions for scraping and storing data on
* California Data Exchange Center stations.
*
* @module Station
*/
var Station = module.exports = {
/**
* Fetch station metadata
*
* @param {String} id
* @return {JSON} metadata
*/
fetchStation: function(id, callback) {
var cdec = config.get('CDEC');
var url = cdec.endpoints.base + cdec.endpoints.station + id;
request(url, function(error, response, html) {
var $ = cheerio.load(html);
var table = $('#main_content table').first();
var rows = table.find('tr');
// Take each cell and add it as an element to an array
// [ row1, row1, row2, row2 ]
var array = [];
rows.has('td').each( function() {
$(this).find('td').each( function(index, content) {
array.push($(content).text());
});
});
// Slice by two elements
// Create a 2D array containing each key-value object
var objects = [];
for (var i = 0; i < (array.length / 2); i++) {
var subArray = array.splice(0, 2);
var key = subArray[0];
var value = subArray[1];
var kv = {};
kv[key] = value;
objects.push(kv);
}
callback(objects);
});
},
/**
* Goes through a hydro search area and collects ids
*
* @param {String} hydroArea
* @return {Array} ids
*/
fetchHydroAreaIDList: function(hydroArea) {
var url =
config.get('CDEC').endpoints.base +
config.get('CDEC').endpoints.hydroUrl +
hydroArea;
fetchStationIDList(url, function(stationIDs){
console.log(stationIDs);
});
},
};
/**
* Fetch a list of station IDs
*
* @param {String} url
* @return {Array} ids
*/
function fetchStationIDList(url, callback) {
request(url, function(error, response, html) {
if (!error && response.code == 200) {
var $ = cheerio.load(html);
var table = $('#main_content table td');
var regex = /^[A-Z]{3}$/;
var ids = [];
table.each( function() {
var td = _.trim($(this).text());
if (td.match(regex)) {
ids.push(td);
}
});
callback(ids);
}
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment