Skip to content

Instantly share code, notes, and snippets.

@rhysallister
Last active August 22, 2021 21:41
Show Gist options
  • Save rhysallister/f759b7389695e2480c03d84656df6abb to your computer and use it in GitHub Desktop.
Save rhysallister/f759b7389695e2480c03d84656df6abb to your computer and use it in GitHub Desktop.
Pull down all data from nhc website as a JSON dump
# Pulls down national Hurricane Centre data.
#
# Data are located in zipfiles. Each zipfile contains one or more shapfiles.
# A shapfile is really a collection of files, I'm interested in the .dbf file as
# this contains the tabular data as well as columns with geographic information.
# If the .dbf file did not have the location data in the table I would have to use
# a library like shapely or fiona to get the geographic data
#
import os
import re
import requests
import zipfile
from dbfread import DBF
from io import BytesIO
from json import dumps
url = 'http://www.nhc.noaa.gov/gis/forecast/archive/'
data = requests.get(url)
with open('data.json','w') as outfile:
for line in data.iter_lines():
zipline = re.search('"[al|ep].+zip"',line)
if zipline:
if 'latest' not in zipline.group():
zipurl = zipline.group().strip('"')
print zipurl
zipreq = requests.get(url+zipurl)
with zipfile.ZipFile(BytesIO(zipreq.content)) as zipp:
items = zipp.infolist()
item = [item.filename for item in items if item.filename[-7:] == 'pts.dbf']
if len(item) == 0:
continue
zipp.extract(item[0])
for row in DBF(item[0]):
row['nhc_filename'] = zipurl[:-4]
outfile.writelines(dumps(row) + '\n')
os.remove(item[0])
print item
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment