Skip to content

Instantly share code, notes, and snippets.

@jashkenas
Last active February 24, 2024 14:57
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jashkenas/8978950 to your computer and use it in GitHub Desktop.
Save jashkenas/8978950 to your computer and use it in GitHub Desktop.
var removeOldReports = true;
var removeSocialMedia = true;
var request = require('request');
var _ = require('underscore');
var xml2js = require('xml2js').parseString;
var fs = require('fs');
var moment = require('moment');
function ScrapeKML(cb) {
var url = "http://www.erh.noaa.gov/hydromet/eventdata/stormTotalv3_12/stormTotalv3_12.kml";
request(url, function(err, resp, body) {
if (err) throw err;
var kmlUrl = body.match(/<href>(\S+?\.point\.snow\.kml)<\/href>/)[1];
request(kmlUrl, function(err, resp, body) {
if (err) throw err;
xml2js(body, function(err, result) {
if (err) throw err;
var places = result.kml.Document[0].Placemark;
places = _.map(places, function(item) {
var loc = item.description[0].match(/Location: <\/b>(.*?)<br>/)[1].split(', ');
return {
place: loc[0].trim().substr(0, 20),
state: loc[1],
longitude: item.LookAt[0].longitude[0],
latitude: item.LookAt[0].latitude[0]
}
});
cb(places);
});
});
});
}
function ScrapeWeather() {
var urls = [
"http://forecast.weather.gov/product.php?site=NWS&issuedby=OKX&product=PNS&format=txt&version=1&glossary=0",
"http://forecast.weather.gov/product.php?site=NWS&issuedby=PHI&product=PNS&format=txt&version=1&glossary=0"
];
var header = "********************STORM TOTAL SNOWFALL********************";
var footer1 = "********************";
var footer2 = "$$";
var abbrevs = {
'CONNECTICUT': 'CT',
'NEW JERSEY': 'NJ',
'NEW YORK': 'NY',
'DELAWARE': 'DE',
'MARYLAND': 'MD',
'PENNSYLVANIA': 'PA'
};
var states = _.keys(abbrevs);
var results = [];
var ScrapeNWS = function(url, cb) {
request(url, function(err, resp, body) {
if (err) throw err;
var text = body.split(header)[1];
// var footer = body.indexOf(footer1) > 0 ? footer1 : footer2;
text = text.split(footer1)[0];
text = text.split(footer2)[0];
var lines = _.compact(text.split(/\n/));
lines = lines.slice(3);
var currentState, currentCounty;
for (var i = 0; i < lines.length; i++) {
var line = lines[i];
if (_.contains(states, line)) {
currentState = line;
} else if (/^\.\.\./.test(line)) {
currentCounty = line.match(/\.\.\.(.+) COUNTY/)[1];
} else {
var tuple = _.compact(line.split(/\s{2,}/));
if (removeSocialMedia) {
if (tuple[4] == 'SOCIAL MEDIA') continue;
}
var time = tuple[2];
var date = tuple[3];
time = time.substr(0, time.length - 5) + ":" + time.substr(-5);
date = date + '/2014';
var timestamp = new Date(Date.parse(time + ' ' + date));
if (removeOldReports) {
if (moment().isAfter(moment(timestamp).add('hours', 3))) continue;
}
results.push({
snowfall: tuple[1],
place: tuple[0],
county: currentCounty,
state: abbrevs[currentState],
timestamp: timestamp.toString(),
source: tuple[4]
});
}
}
if (urls.length) {
ScrapeNWS(urls.shift(), cb);
} else {
cb(results);
}
});
};
ScrapeNWS(urls.shift(), function(results) {
ScrapeKML(function(places) {
for (var i = 0; i < results.length; i++) {
var result = results[i];
var location = _.find(places, function(place) {
return ((place.state == result.state) && (place.place == result.place))
});
if (location) {
result.longitude = location.longitude;
result.latitude = location.latitude;
} else {
console.warn("Could not find coordinates for: " + result.place + ' ' + result.state);
}
}
fs.writeFile('snow.json', JSON.stringify(results, null, 2));
});
});
}
ScrapeWeather();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment