Skip to content

Instantly share code, notes, and snippets.

@onyxfish
Created July 1, 2010 19:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save onyxfish/460427 to your computer and use it in GitHub Desktop.
Save onyxfish/460427 to your computer and use it in GitHub Desktop.
Fetch NOAA weather stations for IL from web.
#!/bin/env python
import csv
import re
from urllib2 import urlopen
from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
NOAA_ROOT_URL = 'http://www.weather.gov/'
list_page = urlopen('http://www.weather.gov/xml/current_obs/seek.php?state=il')
list_page_soup = BeautifulSoup(list_page.read())
station_links = list_page_soup.findAll('a', href=re.compile('^/data/current_obs/.*\.xml$'))
station_urls = [l['href'] for l in station_links]
station_urls = list(set(station_urls)) # Remove duplicates
station_data = []
for url in station_urls:
observation_xml = urlopen(NOAA_ROOT_URL + url)
observation_soup = BeautifulStoneSoup(observation_xml.read())
station_id = observation_soup.current_observation.station_id.string
name = observation_soup.current_observation.location.string
latitude = observation_soup.current_observation.latitude.string
longitude = observation_soup.current_observation.longitude.string
station_data.append((station_id, name, '%s %s' % (latitude, longitude), 'NOAA'))
print 'Got ', station_id
output = csv.writer(open("noaa_station_data.csv", "wb"))
output.writerows([('station_id', 'name', 'location', 'source')])
output.writerows(station_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment