rhysallister/hurricanes.py

## hurricanes.py
#  Pulls down national Hurricane Centre data.
#
#  Data are located in zipfiles. Each zipfile contains one or more shapfiles.
#  A shapfile is really a collection of files, I'm interested in the .dbf file as
#  this contains the tabular data as well as columns with geographic information.
#  If the .dbf file did not have the location data in the table I would have to use
#  a library like shapely or fiona to get the geographic data
#

import os
import re
import requests
import zipfile
from dbfread import DBF
from io import BytesIO
from json import dumps

url = 'http://www.nhc.noaa.gov/gis/forecast/archive/'

data = requests.get(url)

with open('data.json','w') as outfile:
    for line in data.iter_lines():
        zipline = re.search('"[al|ep].+zip"',line)
        if zipline:
            if 'latest' not in zipline.group():
                zipurl = zipline.group().strip('"')
                print zipurl
                zipreq = requests.get(url+zipurl)
                with zipfile.ZipFile(BytesIO(zipreq.content)) as zipp:
                    items = zipp.infolist()
                    item = [item.filename for item in items if item.filename[-7:] == 'pts.dbf']
                    if len(item) == 0:
                        continue
                    zipp.extract(item[0])
                    for row in DBF(item[0]):
                        row['nhc_filename'] = zipurl[:-4]
                        outfile.writelines(dumps(row) + '\n')
                    os.remove(item[0])
                print item
	# Pulls down national Hurricane Centre data.
	#
	# Data are located in zipfiles. Each zipfile contains one or more shapfiles.
	# A shapfile is really a collection of files, I'm interested in the .dbf file as
	# this contains the tabular data as well as columns with geographic information.
	# If the .dbf file did not have the location data in the table I would have to use
	# a library like shapely or fiona to get the geographic data
	#

	import os
	import re
	import requests
	import zipfile
	from dbfread import DBF
	from io import BytesIO
	from json import dumps

	url = 'http://www.nhc.noaa.gov/gis/forecast/archive/'

	data = requests.get(url)

	with open('data.json','w') as outfile:
	for line in data.iter_lines():
	zipline = re.search('"[al\|ep].+zip"',line)
	if zipline:
	if 'latest' not in zipline.group():
	zipurl = zipline.group().strip('"')
	print zipurl
	zipreq = requests.get(url+zipurl)
	with zipfile.ZipFile(BytesIO(zipreq.content)) as zipp:
	items = zipp.infolist()
	item = [item.filename for item in items if item.filename[-7:] == 'pts.dbf']
	if len(item) == 0:
	continue
	zipp.extract(item[0])
	for row in DBF(item[0]):
	row['nhc_filename'] = zipurl[:-4]
	outfile.writelines(dumps(row) + '\n')
	os.remove(item[0])
	print item