Skip to content

Instantly share code, notes, and snippets.

@mwengren
Last active September 14, 2017 16:01
Show Gist options
  • Save mwengren/5a4e7446b925067a2d96b098c0c99ae1 to your computer and use it in GitHub Desktop.
Save mwengren/5a4e7446b925067a2d96b098c0c99ae1 to your computer and use it in GitHub Desktop.
hpcc_ogr.py
import re,sys,os,getopt,urllib2,datetime
import zipfile
from osgeo import ogr
def main():
# set environ['HPCCSCRIPTDIR'] to '.' if not set previously:
if "HPCCSCRIPTDIR" not in os.environ:
os.environ['HPCCSCRIPTDIR'] = "/var/lib/geoserver/data_ingest"
print("setting 'HPCCSCRIPTDIR' to: " + os.environ['HPCCSCRIPTDIR'])
#change current working directory to 'SCRIPTDIR' if present (for cron support)
if "SCRIPTDIR" in os.environ:
os.chdir(os.environ['SCRIPTDIR'])
(year,basin,stormnum)=getopts()
stormnum_formatted = "{0:02d}".format(stormnum)
nhc_download_url="http://www.nhc.noaa.gov/gis/best_track/"
archive_name= basin + str(stormnum_formatted) + str(year) + "_best_track.zip"
download_status,last_modified = download_archive(nhc_download_url,archive_name)
if download_status == False:
sys.exit(2)
else:
#determine the last-modified timedelta, and 'last_modified_hours' number
# of hours since the last modified time of the archive file. Used to determine if processing needed.
last_modified_age = datetime.datetime.now() - last_modified
#last_modified_hours = (last_modified_age.seconds / 3600).round(0)
last_modified_hours = (last_modified_age.days * 24) + (last_modified_age.seconds / 3600)
print("last_modified_hours: " + str(last_modified_hours))
#perform source shapefile processing and data upload only if last_modified_hours is within 24 hours to current time:
if last_modified_hours < 24:
#if True:
print("The last-modified time value: " + str(last_modified_hours) + " is below 24 hour threshold for data update")
print("processing the archive file " + archive_name + " and uploading to GeoServer to update data store")
zip_file = zipfile.ZipFile("download/" + archive_name,'r')
zip_file.extractall("download")
zip_file.close()
#new_shape_datatypes: a dictionary mapping archive shapefile suffixes to new suffixes to use in renaming rules below:
#archive shape names -> new names to upload to PostGIS tables:
# al012012_lin -> nhc_prelim_besttrack_track
# al012012_pts -> nhc_prelim_besttrack_points
# al012012_radii -> nhc_prelim_besttrack_radii
# al012012_windswath -> nhc_prelim_besttrack_windswath
new_shape_datatypes = {'lin':'track','pts':'points','radii':'radii','windswath':'windswath'}
#list of the new output shapefile names from regexp replace below:
new_shapefile_names = ["nhc_prelim_besttrack_points.shp","nhc_prelim_besttrack_radii.shp","nhc_prelim_besttrack_track.shp","nhc_prelim_besttrack_windswath.shp"]
#change working dir to 'download'
os.chdir("download")
for key in sorted(new_shape_datatypes.keys()):
#regexp: al012012_ + key
regexp = basin + stormnum_formatted + str(year) + "_" + key
#rename shapefile files according to above name dictionary
for filename in os.listdir("."):
if re.match(regexp,filename):
#newfilename = "nhc_prelim_besttrack_" + track|points|radii|windswath + ext
newfilename = "nhc_prelim_besttrack_" + re.sub(regexp,new_shape_datatypes[key],filename)
print("rename " + filename + " to: " + newfilename)
os.rename(filename,newfilename)
#fields to add to each source shapefile (missing in source attribution):
# nhc_prelim_besttrack_track: STORMID, BASIN, YEAR
# nhc_prelim_besttrack_points: STORMID
# nhc_prelim_besttrack_radii: ?
# nhc_prelim_besttrack_windswath: YEAR
# STORMID ex: al012012
#loop through each renamed shapefile, check to see field name is present,
# if missing, add via CreateNewField()
for shapefile_name in new_shapefile_names:
source = ogr.Open(os.environ['SCRIPTDIR'] + "/download/" + shapefile_name, 1)
#need to check that the shapefile exists in the archive, if not, log a message and skip:
try:
layer = source.GetLayer()
except AttributeError:
print("Shapefile: " + shapefile_name + " does not exist in archive " + archive_name)
continue
layer_defn = layer.GetLayerDefn()
field_names = [layer_defn.GetFieldDefn(i).GetName() for i in range(layer_defn.GetFieldCount())]
print("Shapefile: " + shapefile_name + " field count: " + str(len(field_names)))
print ("Fieldnames: " + str(field_names))
field_names_added = []
#add new fields if not already present, and assign values:
if not 'STORMID' in field_names:
new_field = ogr.FieldDefn('STORMID', ogr.OFTString)
layer.CreateField(new_field)
field_names_added.append('STORMID')
if not 'YEAR' in field_names:
new_field = ogr.FieldDefn('YEAR', ogr.OFTInteger)
layer.CreateField(new_field)
field_names_added.append('YEAR')
if not 'BASIN' in field_names:
new_field = ogr.FieldDefn('BASIN', ogr.OFTString)
layer.CreateField(new_field)
field_names_added.append('BASIN')
if not 'STORMNUM' in field_names:
new_field = ogr.FieldDefn('STORMNUM', ogr.OFTInteger)
layer.CreateField(new_field)
field_names_added.append('STORMNUM')
if not 'LASTMOD' in field_names:
new_field = ogr.FieldDefn('LASTMOD', ogr.OFTString)
layer.CreateField(new_field)
field_names_added.append('LASTMOD')
layer_defn = layer.GetLayerDefn()
field_names = [layer_defn.GetFieldDefn(i).GetName() for i in range(layer_defn.GetFieldCount())]
print ("Resulting Fieldnames: " + str(field_names))
#create a 'last_modified_string' with specified syntax to represent the 'last-modifed' value determined via HTTP header during download.
# this will be added under the 'LASTMOD' field and used in SQL view generation in GeoServer
last_modified_string = "{0:04d}".format(last_modified.year) + "{0:02d}".format(last_modified.month) + "{0:02d}".format(last_modified.day) + "/" + "{0:02d}".format(last_modified.hour) + "{0:02d}".format(last_modified.minute)
print("last_modified_string: " + last_modified_string)
#populate newly created fields (using field_names_added[] list to detect which were added for
# each source shapefile
i = 0
while i < layer.GetFeatureCount():
feature = layer.GetFeature(i)
if 'STORMID' in field_names_added: feature.SetField('STORMID',basin + stormnum_formatted + str(year))
if 'YEAR' in field_names_added: feature.SetField('YEAR',year)
if 'BASIN' in field_names_added: feature.SetField('BASIN',basin.upper())
if 'STORMNUM' in field_names_added: feature.SetField('STORMNUM',basin)
if 'LASTMOD' in field_names_added: feature.SetField('LASTMOD',last_modified_string)
layer.SetFeature(feature)
i = i + 1
#debug output of resulting attrubution:
print("Shapefile: " + shapefile_name + " feature count: " + str(layer.GetFeatureCount()) + ". Debug values:")
i = 0
while i < layer.GetFeatureCount():
feature = layer.GetFeature(i)
print("STORMID:" + feature.GetFieldAsString('STORMID') + " YEAR:" + feature.GetFieldAsString('YEAR') + " BASIN:" + feature.GetFieldAsString('BASIN') + " STORMNUM:" + feature.GetFieldAsString('STORMNUM') + " LAST_MODIFIED:" + feature.GetFieldAsString('LASTMOD'))
i = i + 1
# Close the Shapefile
source = None
#add all of the files matching any of the new shapefile names in new_shape_names in download dir to .zip file for upload:
uploadzipfilename = "upload_nhc_besttrack.zip"
zip_file = zipfile.ZipFile(uploadzipfilename,'w',zipfile.ZIP_DEFLATED)
#loop through all files in 'download/' dir to match against new_shape_names.values()
for filename in os.listdir("."):
for shapefile_name in new_shapefile_names:
shape_prefix = shapefile_name.split(".")[0]
if re.match(shape_prefix,filename):
zip_file.write(filename)
zip_file.close()
print("Created archive file: " + uploadzipfilename + " for data sets to upload")
#remove the modified shapefile source files from 'download' dir:
for filename in os.listdir("."):
for shapefile_name in new_shapefile_names:
shape_prefix = shapefile_name.split(".")[0]
if re.match(shape_prefix,filename):
os.remove(filename)
os.chdir("../")
upload_archive(uploadzipfilename)
os.remove("download/" + uploadzipfilename)
#create and update SQL views for this storm. Note: both create and update are
# needed because it is unknown whether the particular SQL views have already been created.
# If a create call is issued via REST API, it will error out and the bounds may not be updated.
for type in sorted(new_shape_datatypes.values()):
create_new_sql_view(year,basin,stormnum,type)
update_sql_view(year,basin,stormnum,type)
#set styles for SQL views:
for type in sorted(new_shape_datatypes.values()):
set_layer_style(year,basin,stormnum,type)
else:
print("Last-modified time is " + str(last_modified_hours) + " hours from current time, this is above the limit of 24 to cause data upload. No new data will be uploaded for this storm.")
sys.exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment