Skip to content

Instantly share code, notes, and snippets.

@yosemitebandit
Created February 21, 2012 21:20
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yosemitebandit/1879027 to your computer and use it in GitHub Desktop.
Save yosemitebandit/1879027 to your computer and use it in GitHub Desktop.
Format historical weather data exported from the NCDC
#!/usr/bin/env python
'''
ncdc_processor.py
imports a csv of historical weather data from the NCDC
exports a somewhat more sanely formatted csv suitable for graphing
data from http://www7.ncdc.noaa.gov/CDO/cdo
usage:
$ python ncdc_processor.py /path/to/ncdc_data.txt /path/to/export.csv
'''
import csv
import sys
import time
class NCDC():
def __init__(self):
# more easily understandable headers
self.labels = ['Station', 'WBAN', 'date', 'temp (F)', 'temp count'
, 'dewpoint', 'dwp count', 'sea level pressure', 'slp count'
, 'station pressure', 'stp count', 'visibility (miles)'
, 'vis count', 'windspeed (knots)', 'windspeed count'
, 'max windspeed (knots)', 'gust (knots)', 'max temp (F)'
, 'min temp (F)', 'precipitation (in)', 'snow_depth (in)'
, 'events']
self.reader = csv.DictReader(open(sys.argv[1], 'rb')
, fieldnames=self.labels)
# setting extrasaction to prevent missing fieldname error
# ..though there shouldn't be any missing fields
self.writer = csv.DictWriter(open(sys.argv[2], 'wb')
, fieldnames=self.labels, extrasaction='ignore')
self.writer.writeheader()
def process(self):
for row in self.reader:
# skip the headers from the original
if row['Station'] == 'STN---':
continue
for label in self.labels:
# remove whitespace from all values
row[label] = row[label].strip()
# an asterisk denotes the origin of derived max temp
max_label = 'max temp (F)'
if row[max_label][-1] == '*':
row[max_label] = row[max_label][:-1]
# an asterisk denotes the origin of derived min temp
min_label = 'min temp (F)'
if row[min_label][-1] == '*':
row[min_label] = row[min_label][:-1]
# letters in the precip data denotes the length of a storm
precip_label = 'precipitation (in)'
if row[precip_label][-1] in 'ABCDEFGHI':
row[precip_label] = row[precip_label][:-1]
# format the date
date_label = 'date'
row[date_label] = time.strftime('%Y-%m-%d'
, time.strptime(row[date_label], '%Y%m%d'))
self.writer.writerow(row)
if __name__ == '__main__':
ncdc = NCDC()
ncdc.process()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment