Last active
August 22, 2021 21:41
-
-
Save rhysallister/f759b7389695e2480c03d84656df6abb to your computer and use it in GitHub Desktop.
Pull down all data from nhc website as a JSON dump
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Pulls down national Hurricane Centre data. | |
# | |
# Data are located in zipfiles. Each zipfile contains one or more shapfiles. | |
# A shapfile is really a collection of files, I'm interested in the .dbf file as | |
# this contains the tabular data as well as columns with geographic information. | |
# If the .dbf file did not have the location data in the table I would have to use | |
# a library like shapely or fiona to get the geographic data | |
# | |
import os | |
import re | |
import requests | |
import zipfile | |
from dbfread import DBF | |
from io import BytesIO | |
from json import dumps | |
url = 'http://www.nhc.noaa.gov/gis/forecast/archive/' | |
data = requests.get(url) | |
with open('data.json','w') as outfile: | |
for line in data.iter_lines(): | |
zipline = re.search('"[al|ep].+zip"',line) | |
if zipline: | |
if 'latest' not in zipline.group(): | |
zipurl = zipline.group().strip('"') | |
print zipurl | |
zipreq = requests.get(url+zipurl) | |
with zipfile.ZipFile(BytesIO(zipreq.content)) as zipp: | |
items = zipp.infolist() | |
item = [item.filename for item in items if item.filename[-7:] == 'pts.dbf'] | |
if len(item) == 0: | |
continue | |
zipp.extract(item[0]) | |
for row in DBF(item[0]): | |
row['nhc_filename'] = zipurl[:-4] | |
outfile.writelines(dumps(row) + '\n') | |
os.remove(item[0]) | |
print item |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment