Skip to content

Instantly share code, notes, and snippets.

@nattaylor
Created January 10, 2024 02:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nattaylor/1dd5af8a6cd4061fcfc7a3edb4525266 to your computer and use it in GitHub Desktop.
Save nattaylor/1dd5af8a6cd4061fcfc7a3edb4525266 to your computer and use it in GitHub Desktop.
NDBC historical stdmet buoy data loader
"""Work with NDBC historical data"""
import io
import csv
import datetime
import gzip
import requests
import re
from typing import Iterable, Optional
class NdbcStdmet:
"""Class to work with NDBC historical stdmet data
See: https://www.ndbc.noaa.gov/historical_data.shtml#stdmet
Usage:
>>> ndbc = NdbcStdmet(44013, 2020)
>>> print(list(ndbc.load())[:3])
[(datetime.datetime(2019, 12, 31, 23, 50), '44013', 5.4, 7.0 etc]
"""
URL = 'https://www.ndbc.noaa.gov/data/historical/stdmet/{station}h{year}.txt.gz'
MISSING = ('99.00', '999', '999.9', '99.0', '999.0', '')
HEADERS = ['WSPD', 'GST', 'WVHT', 'DPD', 'APD', 'MWD', 'PRES', 'ATMP', 'WTMP', 'DEWP', 'VIS', 'TIDE']
station = None
year = None
def __init__(self, station, year):
self.station = station
self.year = year
def load(self) -> Iterable[tuple]:
"""get observations, handle headers and process missing data"""
reader = csv.reader(io.TextIOWrapper(gzip.GzipFile(fileobj=io.BytesIO(requests.get(self.URL.format(station=self.station, year=self.year), stream=True).content)), 'utf8'), delimiter=' ', skipinitialspace=True)
header = next(reader)
# Does observation includes minutes
dt_last_col = header.index('mm')+1 if 'mm' in header else 4
year_prefix = '19' if header[0] == 'YY' else ''
# Remove # and skip row for new format starting in 2007 with 2-header rows and # prefix
if header[0][0] == '#':
header[0] = header[0][1:]
next(reader)
# TIDE is often missing
cols_to_add = []
if header[-1] != 'TIDE':
header.append('TIDE')
cols_to_add += ['TIDE']
if self.HEADERS != header[dt_last_col+1:]:
raise ValueError(f'Corrupt header. Missing: {set(self.HEADERS) - set(header[dt_last_col+1:])}')
def _float(s: str) -> Optional[float]:
return None if s in self.MISSING else float(s)
def _row(row: list) -> tuple:
row[0] = year_prefix+row[0]
# Discard empty string extra column
if row[-1] == '':
row = row[:-1]
# Still handling TIDE
for col in cols_to_add:
row.append('')
# First columns are date related, normalized above
dt = datetime.datetime(*[int(r) for r in row[0:dt_last_col]])
return (dt, self.station, *[_float(r) for r in row[dt_last_col+1:]])
return (_row(row) for row in reader)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment