Created
February 21, 2012 21:20
-
-
Save yosemitebandit/1879027 to your computer and use it in GitHub Desktop.
Format historical weather data exported from the NCDC
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
''' | |
ncdc_processor.py | |
imports a csv of historical weather data from the NCDC | |
exports a somewhat more sanely formatted csv suitable for graphing | |
data from http://www7.ncdc.noaa.gov/CDO/cdo | |
usage: | |
$ python ncdc_processor.py /path/to/ncdc_data.txt /path/to/export.csv | |
''' | |
import csv | |
import sys | |
import time | |
class NCDC(): | |
def __init__(self): | |
# more easily understandable headers | |
self.labels = ['Station', 'WBAN', 'date', 'temp (F)', 'temp count' | |
, 'dewpoint', 'dwp count', 'sea level pressure', 'slp count' | |
, 'station pressure', 'stp count', 'visibility (miles)' | |
, 'vis count', 'windspeed (knots)', 'windspeed count' | |
, 'max windspeed (knots)', 'gust (knots)', 'max temp (F)' | |
, 'min temp (F)', 'precipitation (in)', 'snow_depth (in)' | |
, 'events'] | |
self.reader = csv.DictReader(open(sys.argv[1], 'rb') | |
, fieldnames=self.labels) | |
# setting extrasaction to prevent missing fieldname error | |
# ..though there shouldn't be any missing fields | |
self.writer = csv.DictWriter(open(sys.argv[2], 'wb') | |
, fieldnames=self.labels, extrasaction='ignore') | |
self.writer.writeheader() | |
def process(self): | |
for row in self.reader: | |
# skip the headers from the original | |
if row['Station'] == 'STN---': | |
continue | |
for label in self.labels: | |
# remove whitespace from all values | |
row[label] = row[label].strip() | |
# an asterisk denotes the origin of derived max temp | |
max_label = 'max temp (F)' | |
if row[max_label][-1] == '*': | |
row[max_label] = row[max_label][:-1] | |
# an asterisk denotes the origin of derived min temp | |
min_label = 'min temp (F)' | |
if row[min_label][-1] == '*': | |
row[min_label] = row[min_label][:-1] | |
# letters in the precip data denotes the length of a storm | |
precip_label = 'precipitation (in)' | |
if row[precip_label][-1] in 'ABCDEFGHI': | |
row[precip_label] = row[precip_label][:-1] | |
# format the date | |
date_label = 'date' | |
row[date_label] = time.strftime('%Y-%m-%d' | |
, time.strptime(row[date_label], '%Y%m%d')) | |
self.writer.writerow(row) | |
if __name__ == '__main__': | |
ncdc = NCDC() | |
ncdc.process() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment