Last active
September 14, 2017 16:01
-
-
Save mwengren/5a4e7446b925067a2d96b098c0c99ae1 to your computer and use it in GitHub Desktop.
hpcc_ogr.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re,sys,os,getopt,urllib2,datetime | |
import zipfile | |
from osgeo import ogr | |
def main(): | |
# set environ['HPCCSCRIPTDIR'] to '.' if not set previously: | |
if "HPCCSCRIPTDIR" not in os.environ: | |
os.environ['HPCCSCRIPTDIR'] = "/var/lib/geoserver/data_ingest" | |
print("setting 'HPCCSCRIPTDIR' to: " + os.environ['HPCCSCRIPTDIR']) | |
#change current working directory to 'SCRIPTDIR' if present (for cron support) | |
if "SCRIPTDIR" in os.environ: | |
os.chdir(os.environ['SCRIPTDIR']) | |
(year,basin,stormnum)=getopts() | |
stormnum_formatted = "{0:02d}".format(stormnum) | |
nhc_download_url="http://www.nhc.noaa.gov/gis/best_track/" | |
archive_name= basin + str(stormnum_formatted) + str(year) + "_best_track.zip" | |
download_status,last_modified = download_archive(nhc_download_url,archive_name) | |
if download_status == False: | |
sys.exit(2) | |
else: | |
#determine the last-modified timedelta, and 'last_modified_hours' number | |
# of hours since the last modified time of the archive file. Used to determine if processing needed. | |
last_modified_age = datetime.datetime.now() - last_modified | |
#last_modified_hours = (last_modified_age.seconds / 3600).round(0) | |
last_modified_hours = (last_modified_age.days * 24) + (last_modified_age.seconds / 3600) | |
print("last_modified_hours: " + str(last_modified_hours)) | |
#perform source shapefile processing and data upload only if last_modified_hours is within 24 hours to current time: | |
if last_modified_hours < 24: | |
#if True: | |
print("The last-modified time value: " + str(last_modified_hours) + " is below 24 hour threshold for data update") | |
print("processing the archive file " + archive_name + " and uploading to GeoServer to update data store") | |
zip_file = zipfile.ZipFile("download/" + archive_name,'r') | |
zip_file.extractall("download") | |
zip_file.close() | |
#new_shape_datatypes: a dictionary mapping archive shapefile suffixes to new suffixes to use in renaming rules below: | |
#archive shape names -> new names to upload to PostGIS tables: | |
# al012012_lin -> nhc_prelim_besttrack_track | |
# al012012_pts -> nhc_prelim_besttrack_points | |
# al012012_radii -> nhc_prelim_besttrack_radii | |
# al012012_windswath -> nhc_prelim_besttrack_windswath | |
new_shape_datatypes = {'lin':'track','pts':'points','radii':'radii','windswath':'windswath'} | |
#list of the new output shapefile names from regexp replace below: | |
new_shapefile_names = ["nhc_prelim_besttrack_points.shp","nhc_prelim_besttrack_radii.shp","nhc_prelim_besttrack_track.shp","nhc_prelim_besttrack_windswath.shp"] | |
#change working dir to 'download' | |
os.chdir("download") | |
for key in sorted(new_shape_datatypes.keys()): | |
#regexp: al012012_ + key | |
regexp = basin + stormnum_formatted + str(year) + "_" + key | |
#rename shapefile files according to above name dictionary | |
for filename in os.listdir("."): | |
if re.match(regexp,filename): | |
#newfilename = "nhc_prelim_besttrack_" + track|points|radii|windswath + ext | |
newfilename = "nhc_prelim_besttrack_" + re.sub(regexp,new_shape_datatypes[key],filename) | |
print("rename " + filename + " to: " + newfilename) | |
os.rename(filename,newfilename) | |
#fields to add to each source shapefile (missing in source attribution): | |
# nhc_prelim_besttrack_track: STORMID, BASIN, YEAR | |
# nhc_prelim_besttrack_points: STORMID | |
# nhc_prelim_besttrack_radii: ? | |
# nhc_prelim_besttrack_windswath: YEAR | |
# STORMID ex: al012012 | |
#loop through each renamed shapefile, check to see field name is present, | |
# if missing, add via CreateNewField() | |
for shapefile_name in new_shapefile_names: | |
source = ogr.Open(os.environ['SCRIPTDIR'] + "/download/" + shapefile_name, 1) | |
#need to check that the shapefile exists in the archive, if not, log a message and skip: | |
try: | |
layer = source.GetLayer() | |
except AttributeError: | |
print("Shapefile: " + shapefile_name + " does not exist in archive " + archive_name) | |
continue | |
layer_defn = layer.GetLayerDefn() | |
field_names = [layer_defn.GetFieldDefn(i).GetName() for i in range(layer_defn.GetFieldCount())] | |
print("Shapefile: " + shapefile_name + " field count: " + str(len(field_names))) | |
print ("Fieldnames: " + str(field_names)) | |
field_names_added = [] | |
#add new fields if not already present, and assign values: | |
if not 'STORMID' in field_names: | |
new_field = ogr.FieldDefn('STORMID', ogr.OFTString) | |
layer.CreateField(new_field) | |
field_names_added.append('STORMID') | |
if not 'YEAR' in field_names: | |
new_field = ogr.FieldDefn('YEAR', ogr.OFTInteger) | |
layer.CreateField(new_field) | |
field_names_added.append('YEAR') | |
if not 'BASIN' in field_names: | |
new_field = ogr.FieldDefn('BASIN', ogr.OFTString) | |
layer.CreateField(new_field) | |
field_names_added.append('BASIN') | |
if not 'STORMNUM' in field_names: | |
new_field = ogr.FieldDefn('STORMNUM', ogr.OFTInteger) | |
layer.CreateField(new_field) | |
field_names_added.append('STORMNUM') | |
if not 'LASTMOD' in field_names: | |
new_field = ogr.FieldDefn('LASTMOD', ogr.OFTString) | |
layer.CreateField(new_field) | |
field_names_added.append('LASTMOD') | |
layer_defn = layer.GetLayerDefn() | |
field_names = [layer_defn.GetFieldDefn(i).GetName() for i in range(layer_defn.GetFieldCount())] | |
print ("Resulting Fieldnames: " + str(field_names)) | |
#create a 'last_modified_string' with specified syntax to represent the 'last-modifed' value determined via HTTP header during download. | |
# this will be added under the 'LASTMOD' field and used in SQL view generation in GeoServer | |
last_modified_string = "{0:04d}".format(last_modified.year) + "{0:02d}".format(last_modified.month) + "{0:02d}".format(last_modified.day) + "/" + "{0:02d}".format(last_modified.hour) + "{0:02d}".format(last_modified.minute) | |
print("last_modified_string: " + last_modified_string) | |
#populate newly created fields (using field_names_added[] list to detect which were added for | |
# each source shapefile | |
i = 0 | |
while i < layer.GetFeatureCount(): | |
feature = layer.GetFeature(i) | |
if 'STORMID' in field_names_added: feature.SetField('STORMID',basin + stormnum_formatted + str(year)) | |
if 'YEAR' in field_names_added: feature.SetField('YEAR',year) | |
if 'BASIN' in field_names_added: feature.SetField('BASIN',basin.upper()) | |
if 'STORMNUM' in field_names_added: feature.SetField('STORMNUM',basin) | |
if 'LASTMOD' in field_names_added: feature.SetField('LASTMOD',last_modified_string) | |
layer.SetFeature(feature) | |
i = i + 1 | |
#debug output of resulting attrubution: | |
print("Shapefile: " + shapefile_name + " feature count: " + str(layer.GetFeatureCount()) + ". Debug values:") | |
i = 0 | |
while i < layer.GetFeatureCount(): | |
feature = layer.GetFeature(i) | |
print("STORMID:" + feature.GetFieldAsString('STORMID') + " YEAR:" + feature.GetFieldAsString('YEAR') + " BASIN:" + feature.GetFieldAsString('BASIN') + " STORMNUM:" + feature.GetFieldAsString('STORMNUM') + " LAST_MODIFIED:" + feature.GetFieldAsString('LASTMOD')) | |
i = i + 1 | |
# Close the Shapefile | |
source = None | |
#add all of the files matching any of the new shapefile names in new_shape_names in download dir to .zip file for upload: | |
uploadzipfilename = "upload_nhc_besttrack.zip" | |
zip_file = zipfile.ZipFile(uploadzipfilename,'w',zipfile.ZIP_DEFLATED) | |
#loop through all files in 'download/' dir to match against new_shape_names.values() | |
for filename in os.listdir("."): | |
for shapefile_name in new_shapefile_names: | |
shape_prefix = shapefile_name.split(".")[0] | |
if re.match(shape_prefix,filename): | |
zip_file.write(filename) | |
zip_file.close() | |
print("Created archive file: " + uploadzipfilename + " for data sets to upload") | |
#remove the modified shapefile source files from 'download' dir: | |
for filename in os.listdir("."): | |
for shapefile_name in new_shapefile_names: | |
shape_prefix = shapefile_name.split(".")[0] | |
if re.match(shape_prefix,filename): | |
os.remove(filename) | |
os.chdir("../") | |
upload_archive(uploadzipfilename) | |
os.remove("download/" + uploadzipfilename) | |
#create and update SQL views for this storm. Note: both create and update are | |
# needed because it is unknown whether the particular SQL views have already been created. | |
# If a create call is issued via REST API, it will error out and the bounds may not be updated. | |
for type in sorted(new_shape_datatypes.values()): | |
create_new_sql_view(year,basin,stormnum,type) | |
update_sql_view(year,basin,stormnum,type) | |
#set styles for SQL views: | |
for type in sorted(new_shape_datatypes.values()): | |
set_layer_style(year,basin,stormnum,type) | |
else: | |
print("Last-modified time is " + str(last_modified_hours) + " hours from current time, this is above the limit of 24 to cause data upload. No new data will be uploaded for this storm.") | |
sys.exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment