Skip to content

Instantly share code, notes, and snippets.

@erussell
Created July 25, 2012 14:59
Show Gist options
  • Save erussell/3176613 to your computer and use it in GitHub Desktop.
Save erussell/3176613 to your computer and use it in GitHub Desktop.
Python script to be run daily to maintain an ArcGIS mosaic dataset of Growing Degree Days, using tempertaure data from the Historical Climate Network
/data
/mosaic.gdb
/scratch.gdb
import arcpy, calendar, csv, datetime, httplib, io, json, logging, math, MySQLdb, os, re, sys, urllib
logger = logging.getLogger('fieldscope.gdd')
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.DEBUG)
class GDD (object):
db_conn = None
mosaic_dataset = None
mask_dataset = None
data_folder = None
def __init__ (self):
'''Set up the complete execution environment for this script'''
# Set up geoprocessing environment defaults
sr = arcpy.SpatialReference()
sr.loadFromString(r'PROJCS["WGS_1984_Web_Mercator_Auxiliary_Sphere",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Mercator_Auxiliary_Sphere"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],PARAMETER["Standard_Parallel_1",0.0],PARAMETER["Auxiliary_Sphere_Type",0.0],UNIT["Meter",1.0],AUTHORITY["EPSG",3857]]')
arcpy.env.outputCoordinateSystem = sr
arcpy.env.extent = arcpy.Extent(-20000000, 1800000, -7000000, 11600000)
arcpy.env.rasterStatistics = 'STATISTICS'
arcpy.env.overwriteOutput = True
arcpy.env.cellSize = 5000
root_folder = os.path.abspath(os.path.dirname(__file__))
mask_dataset = os.path.join(root_folder, 'mask.img')
if os.path.exists(mask_dataset):
self.mask_dataset = mask_dataset
arcpy.env.mask = mask_dataset
# Create a scratch geodatabase for storing intermediate results
scratch_gdb = os.path.join(root_folder, 'scratch.gdb')
if not os.path.exists(scratch_gdb):
logger.debug('creating scratch.gdb')
arcpy.management.CreateFileGDB(os.path.dirname(scratch_gdb), os.path.basename(scratch_gdb))
arcpy.env.scratchWorkspace = scratch_gdb
arcpy.env.workspace = scratch_gdb
# create a snap raster
snap_raster = os.path.join(scratch_gdb, 'snap')
if not arcpy.Exists(snap_raster):
logger.debug('creating %s' % snap_raster)
arcpy.management.CreateRasterDataset(os.path.dirname(snap_raster), os.path.basename(snap_raster), 5000, "8_BIT_UNSIGNED", sr, 1, "", "", "", "NONE", "")
arcpy.env.snapRaster = snap_raster
# Create a geodatabase to hold our output data
self.data_folder = os.path.join(root_folder, 'data')
if not os.path.exists(self.data_folder):
logger.debug('creating %s' % self.data_folder)
os.makedirs(self.data_folder)
mosaic_gdb = os.path.join(root_folder, 'mosaic.gdb')
if not os.path.exists(mosaic_gdb):
logger.debug('creating %s' % mosaic_gdb)
arcpy.management.CreateFileGDB(os.path.dirname(mosaic_gdb), os.path.basename(mosaic_gdb))
self.mosaic_dataset = os.path.join(mosaic_gdb, 'growing_degree_days')
if not arcpy.Exists(self.mosaic_dataset):
logger.debug('creating %s', self.mosaic_dataset)
arcpy.management.CreateMosaicDataset(os.path.dirname(self.mosaic_dataset), os.path.basename(self.mosaic_dataset), sr, 1, '16_BIT_UNSIGNED')
arcpy.management.AddField(self.mosaic_dataset, 'BeginDate', 'DATE')
arcpy.management.AddIndex(self.mosaic_dataset, 'BeginDate', 'GDB_3_BeginDate')
arcpy.management.AddField(self.mosaic_dataset, 'EndDate', 'DATE')
arcpy.management.AddIndex(self.mosaic_dataset, 'EndDate', 'GDB_3_EndDate')
self.db_conn = MySQLdb.connect('budburst.cbxoccr8gbls.us-east-1.rds.amazonaws.com', 'asp', 'FS4wsVc5', 'gdd')
def _get_raster_path (self, date):
year_folder = os.path.join(self.data_folder, '%04d' % date.year)
if not os.path.exists(year_folder):
os.makedirs(year_folder)
return os.path.join(year_folder, get_raster_name(date) + '.tif')
def store_temperatures (self, begin_date, end_date):
'''Download temperature data from National Climate Data Center's Global Historical Climate Network dataset'''
logger.debug('loading data between %s and %s from ncdc.noaa.gov', begin_date.isoformat(), end_date.isoformat())
db_cursor = self.db_conn.cursor()
db_cursor.execute('SELECT id FROM station;')
count = 0
for record in db_cursor.fetchall():
station_id = record[0]
try:
response = http_get('www1.ncdc.noaa.gov', '/pub/data/ghcn/daily/all/%s.dly' % station_id)
records = {}
for row in iter(response.splitlines()):
element = row[17:21].strip().lower()
if element != 'tmin' and element != 'tmax':
continue
year = int(row[11:15])
month = int(row[15:17])
end_of_month = datetime.date(year, month, 1) + datetime.timedelta(days=30)
beginning_of_month = datetime.date(year, month, 1) - datetime.timedelta(days=1)
if end_of_month < begin_date or beginning_of_month > end_date:
continue
days_in_month = calendar.monthrange(year, month)[1]
for day,index in enumerate(xrange(21, 262, 8), start=1):
if day > days_in_month:
continue
observation_date = datetime.date(year, month, day)
if observation_date not in records:
records[observation_date] = {}
if observation_date < begin_date:
continue
if observation_date > end_date:
break
celsius_tenths = int(row[index:index+5])
if celsius_tenths == -9999:
continue
fahrenheit = int(celsius_tenths * 0.9/5) + 32
records[observation_date][element] = fahrenheit
db_cursor.executemany('INSERT INTO temperature (station,date,tmin,tmax) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE tmin=VALUES(tmin),tmax=VALUES(tmax);',
[ (station_id, date, data['tmin'], data['tmax'])
for date, data in records.iteritems()
if data.get('tmin', None) and data.get('tmax', None) ])
count += db_cursor.rowcount
except:
logger.error('error loading data for station %s', station_id)
self.db_conn.commit()
db_cursor.close()
logger.debug('loaded data for %s stations', count)
def raster_exists (self, date):
return arcpy.Exists(self._get_raster_path(date))
def get_observation_count (self, date):
db_cursor = self.db_conn.cursor()
db_cursor.execute('SELECT COUNT(*) from temperature t WHERE t.date=%s', date)
result = db_cursor.fetchone()[0]
db_cursor.close()
return result
def create_raster (self, date, min_temp, max_temp):
'''Create a raster of growing degree days for the given date. Assumes
that temperature data for that date has already been loaded into the
database'''
logger.debug('creating raster for %s', date.isoformat())
feature_class = arcpy.management.CreateFeatureclass("in_memory", "temp", "POINT")
arcpy.management.AddField(feature_class, 'tmin', 'SHORT')
arcpy.management.AddField(feature_class, 'tmax', 'SHORT')
fc_cursor = arcpy.InsertCursor(feature_class)
point = arcpy.Point()
db_cursor = self.db_conn.cursor()
db_cursor.execute('SELECT s.x,s.y,t.tmax,t.tmin FROM temperature t INNER JOIN station s ON s.id=t.station WHERE t.date=%s', date)
record_count = 0
for record in db_cursor.fetchall():
point.X = record[0]
point.Y = record[1]
row = fc_cursor.newRow()
row.shape = point
row.tmax = record[2]
row.tmin = record[3]
fc_cursor.insertRow(row)
record_count += 1
db_cursor.close()
del fc_cursor
logger.debug('interpolating %s points', record_count)
arcpy.CheckOutExtension("Spatial")
tmax_ras = arcpy.sa.NaturalNeighbor(feature_class, 'tmax', 5000)
tmin_ras = arcpy.sa.NaturalNeighbor(feature_class, 'tmin', 5000)
temp_range = max_temp - min_temp
gdd_ras = arcpy.sa.Minus(arcpy.sa.Divide(arcpy.sa.Plus(tmax_ras, tmin_ras), 2), min_temp)
gdd_ras = arcpy.sa.Con(gdd_ras < 0, 0, gdd_ras)
gdd_ras = arcpy.sa.Con(gdd_ras > temp_range, temp_range, gdd_ras)
prev_day = date - datetime.timedelta(1)
prev_ras = self._get_raster_path(prev_day)
if arcpy.Exists(prev_ras) and (date.month != 1 or date.day != 1):
gdd_ras = arcpy.sa.Plus(gdd_ras, prev_ras)
if self.mask_dataset:
gdd_ras = arcpy.sa.ExtractByMask(gdd_ras, self.mask_dataset)
out_ras = self._get_raster_path(date)
arcpy.management.CopyRaster(gdd_ras, out_ras, "DEFAULTS", "", 65535, "", "", "16_BIT_UNSIGNED")
arcpy.management.Delete(feature_class)
arcpy.management.Delete(gdd_ras)
arcpy.CheckInExtension("Spatial")
# Remember how many records were used to create the raster
db_cursor = self.db_conn.cursor()
db_cursor.execute('INSERT INTO record_counts (date,count) VALUES (%s, %s) ON DUPLICATE KEY UPDATE count=VALUES(count)', (date, record_count))
self.db_conn.commit()
db_cursor.close()
return out_ras
def add_raster_to_mosaic (self, date):
'''Add the given growing degree day raster for the given date to the master
raster catalog, and mark it as beloning to that date'''
raster_name = get_raster_name(date)
rows = arcpy.UpdateCursor(self.mosaic_dataset, "Name = '%s'" % raster_name)
for row in rows:
logger.debug('removing existing raster %s', raster_name)
rows.deleteRow(row)
del rows
logger.debug('adding raster %s to mosaic', raster_name)
arcpy.management.AddRastersToMosaicDataset(self.mosaic_dataset, 'Raster Dataset', self._get_raster_path(date), \
'UPDATE_CELL_SIZES', 'UPDATE_BOUNDARY', 'NO_OVERVIEWS', \
'#', '#', '#', '#', '#', '#', '#', \
'BUILD_PYRAMIDS', 'CALCULATE_STATISTICS', 'BUILD_THUMBNAILS')
rows = arcpy.UpdateCursor(self.mosaic_dataset, "Name = '%s'" % raster_name)
end_date = date + datetime.timedelta(1)
for row in rows:
row.BeginDate = "%s/%s/%s" % (date.month, date.day, date.year,)
row.EndDate = "%s/%s/%s" % (end_date.month, end_date.day, end_date.year,)
rows.updateRow(row)
del rows
def update_mosaic_statistics (self):
logger.debug('updating mosaic statistics')
arcpy.management.CalculateStatistics(self.mosaic_dataset)
arcpy.management.BuildPyramidsandStatistics(self.mosaic_dataset, 'INCLUDE_SUBDIRECTORIES', 'BUILD_PYRAMIDS', 'CALCULATE_STATISTICS')
arcpy.RefreshCatalog(self.mosaic_dataset)
def get_raster_name (date):
return date.strftime('GDD_%Y%m%d')
def http_get (host, path):
conn = httplib.HTTPConnection(host)
conn.request('GET', path)
response = conn.getresponse()
if response.status == 200:
return response.read()
else:
raise httplib.HTTPException()
def main (argv=None):
'''Usage: <script> (<begin_date> (<end_date>))
create growing degree day rasters for each day between begin_date
(which defaults to five days ago) and end_date (which defaults to
today), inclusive. Dates should be given in YYYY-MM-DD format. Will
only create rasters for days that don't already have one, and only
if at least 3000 temperature observations are available.'''
begin_date = datetime.date.today() - datetime.timedelta(5)
end_date = datetime.date.today()
if argv is not None and len(argv) > 0:
begin_date = datetime.datetime.strptime(argv[0], '%Y-%m-%d').date()
if argv is not None and len(argv) > 1:
end_date = datetime.datetime.strptime(argv[1], '%Y-%m-%d').date()
if end_date < begin_date:
raise Exception('begin_date must be before end_date')
processor = GDD()
if begin_date.month != 1 or begin_date.day != 1:
prev_date = begin_date - datetime.timedelta(1)
if not processor.raster_exists(prev_date):
raise Exception('beginning date %s is not the first day of year and previous day has no data' % begin_date.isoformat())
logger.info('running gdd script for %s to %s' % (begin_date.isoformat(), end_date.isoformat()))
while processor.raster_exists(begin_date) and begin_date <= end_date:
logger.debug('raster already exists for %s' % begin_date)
begin_date = begin_date + datetime.timedelta(1)
if begin_date > end_date:
return 0
processor.store_temperatures(begin_date, end_date)
current_date = begin_date
while current_date <= end_date:
if processor.get_observation_count(current_date) < 3000:
logger.debug('insufficient data to create raster for %s' % current_date)
break
processor.create_raster(current_date, 50, 86)
processor.add_raster_to_mosaic(current_date)
current_date = current_date + datetime.timedelta(1)
processor.update_mosaic_statistics()
return 0
if __name__ == "__main__":
status = main(sys.argv[1:])
sys.exit(0)
CREATE TABLE station (id VARCHAR(11) NOT NULL, x INT NOT NULL, y INT NOT NULL, PRIMARY KEY (id));
CREATE TABLE temperature (station VARCHAR(11) NOT NULL REFERENCES station(id),
tmin INT NOT NULL,
tmax INT NOT NULL,
date DATE NOT NULL,
PRIMARY KEY (station,date));
CREATE INDEX temperature_station_index ON temperature (station);
CREATE INDEX temperature_date_index ON temperature (date);
CREATE TABLE record_counts (date DATE NOT NULL, count INT NOT NULL, PRIMARY KEY (date));
<PAMDataset>
<Metadata>
<MDI key="DataType">Thematic</MDI>
</Metadata>
<Metadata domain="Esri">
<MDI key="PyramidResamplingType">NEAREST</MDI>
</Metadata>
<PAMRasterBand band="1">
<Description>Layer_1</Description>
<GDALRasterAttributeTable>
<FieldDefn index="0">
<Name>Rowid</Name>
<Type>0</Type>
<Usage>0</Usage>
</FieldDefn>
<FieldDefn index="1">
<Name>VALUE</Name>
<Type>0</Type>
<Usage>0</Usage>
</FieldDefn>
<FieldDefn index="2">
<Name>COUNT</Name>
<Type>0</Type>
<Usage>0</Usage>
</FieldDefn>
<Row index="0">
<F>0</F>
<F>0</F>
<F>854363</F>
</Row>
</GDALRasterAttributeTable>
<Metadata domain="IMAGE_STRUCTURE">
<MDI key="COMPRESSION">RLE</MDI>
</Metadata>
<Metadata>
<MDI key="RepresentationType">THEMATIC</MDI>
<MDI key="LAYER_TYPE">athematic</MDI>
<MDI key="STATISTICS_MINIMUM">0</MDI>
<MDI key="STATISTICS_MAXIMUM">0</MDI>
<MDI key="STATISTICS_MEAN">0</MDI>
<MDI key="STATISTICS_STDDEV">0</MDI>
</Metadata>
</PAMRasterBand>
</PAMDataset>
paWVALUEN COUNTN 0 854363
<?xml version="1.0" encoding="UTF-8"?>
<metadata xml:lang="en"><Esri><CreaDate>20120801</CreaDate><CreaTime>12502600</CreaTime><ArcGISFormat>1.0</ArcGISFormat><SyncOnce>FALSE</SyncOnce><DataProperties><itemProps><itemName Sync="TRUE">mask.img</itemName><itemLocation><linkage Sync="TRUE">file://C:\FieldScope\tasks\gdd\mask.img</linkage><protocol Sync="TRUE">Local Area Network</protocol></itemLocation><imsContentType Sync="TRUE">002</imsContentType><nativeExtBox><westBL Sync="TRUE">-20002507.842788</westBL><eastBL Sync="TRUE">-6997507.842788</eastBL><southBL Sync="TRUE">1795971.458386</southBL><northBL Sync="TRUE">11600971.458386</northBL><exTypeCode Sync="TRUE">1</exTypeCode></nativeExtBox></itemProps><RasterProperties><General><PixelDepth Sync="TRUE">8</PixelDepth><HasColormap Sync="TRUE">FALSE</HasColormap><CompressionType Sync="TRUE">RLE</CompressionType><NumBands Sync="TRUE">1</NumBands><Format Sync="TRUE">IMAGINE Image</Format><HasPyramids Sync="TRUE">TRUE</HasPyramids><SourceType Sync="TRUE">discrete</SourceType><PixelType Sync="TRUE">unsigned integer</PixelType><NoDataValue Sync="TRUE">255</NoDataValue></General></RasterProperties><lineage><Process ToolSource="c:\program files (x86)\arcgis\desktop10.1\ArcToolbox\Toolboxes\Data Management Tools.tbx\CopyRaster" Date="20120801" Time="125026">CopyRaster C:\FieldScope\tasks\gdd\t_t810 C:\FieldScope\tasks\gdd\mask.img # # 255 NONE NONE 8_BIT_UNSIGNED NONE NONE</Process></lineage></DataProperties><SyncDate>20120801</SyncDate><SyncTime>12502600</SyncTime><ModDate>20120801</ModDate><ModTime>12502600</ModTime></Esri><dataIdInfo><envirDesc Sync="TRUE">Microsoft Windows Server 2008 R2 Version 6.1 (Build 7601) Service Pack 1; Esri ArcGIS 10.1.0.3035</envirDesc><dataLang><languageCode value="eng" Sync="TRUE"></languageCode><countryCode value="USA" Sync="TRUE"></countryCode></dataLang><idCitation><resTitle Sync="TRUE">mask.img</resTitle><presForm><PresFormCd value="005" Sync="TRUE"></PresFormCd></presForm></idCitation><spatRpType><SpatRepTypCd value="002" Sync="TRUE"></SpatRepTypCd></spatRpType></dataIdInfo><mdLang><languageCode value="eng" Sync="TRUE"></languageCode><countryCode value="USA" Sync="TRUE"></countryCode></mdLang><mdChar><CharSetCd value="004" Sync="TRUE"></CharSetCd></mdChar><distInfo><distFormat><formatName Sync="TRUE">Raster Dataset</formatName></distFormat></distInfo><mdHrLv><ScopeCd value="005" Sync="TRUE"></ScopeCd></mdHrLv><mdHrLvName Sync="TRUE">dataset</mdHrLvName><spatRepInfo><Georect><cellGeo><CellGeoCd Sync="TRUE" value="002"></CellGeoCd></cellGeo><numDims Sync="TRUE">2</numDims><tranParaAv Sync="TRUE">1</tranParaAv><chkPtAv Sync="TRUE">0</chkPtAv><cornerPts><pos Sync="TRUE">-20002507.842788 1795971.458386</pos></cornerPts><cornerPts><pos Sync="TRUE">-20002507.842788 11600971.458386</pos></cornerPts><cornerPts><pos Sync="TRUE">-6997507.842788 11600971.458386</pos></cornerPts><cornerPts><pos Sync="TRUE">-6997507.842788 1795971.458386</pos></cornerPts><centerPt><pos Sync="TRUE">-13500007.842788 6698471.458386</pos></centerPt><axisDimension type="002"><dimSize Sync="TRUE">2601</dimSize><dimResol><value Sync="TRUE">5000.000000</value></dimResol></axisDimension><axisDimension type="001"><dimSize Sync="TRUE">1961</dimSize><dimResol><value Sync="TRUE">5000.000000</value></dimResol></axisDimension><ptInPixel><PixOrientCd Sync="TRUE" value="001"></PixOrientCd></ptInPixel></Georect></spatRepInfo><eainfo><detailed Name="mask.img.vat"><enttyp><enttypl Sync="TRUE">mask.img.vat</enttypl><enttypt Sync="TRUE">Table</enttypt><enttypc Sync="TRUE">1</enttypc></enttyp><attr><attrlabl Sync="TRUE">OID</attrlabl><attalias Sync="TRUE">OID</attalias><attrtype Sync="TRUE">OID</attrtype><attwidth Sync="TRUE">4</attwidth><atprecis Sync="TRUE">0</atprecis><attscale Sync="TRUE">0</attscale><attrdef Sync="TRUE">Internal feature number.</attrdef><attrdefs Sync="TRUE">Esri</attrdefs><attrdomv><udom Sync="TRUE">Sequential unique whole numbers that are automatically generated.</udom></attrdomv></attr><attr><attrlabl Sync="TRUE">VALUE</attrlabl><attalias Sync="TRUE">VALUE</attalias><attrtype Sync="TRUE">Integer</attrtype><attwidth Sync="TRUE">9</attwidth><atprecis Sync="TRUE">9</atprecis><attscale Sync="TRUE">0</attscale></attr><attr><attrlabl Sync="TRUE">COUNT</attrlabl><attalias Sync="TRUE">COUNT</attalias><attrtype Sync="TRUE">Integer</attrtype><attwidth Sync="TRUE">9</attwidth><atprecis Sync="TRUE">9</atprecis><attscale Sync="TRUE">0</attscale></attr></detailed></eainfo><contInfo><ImgDesc><contentTyp><ContentTypCd Sync="TRUE" value="002"></ContentTypCd></contentTyp><covDim><Band><dimDescrp Sync="TRUE">Layer_1</dimDescrp><bitsPerVal Sync="TRUE">8</bitsPerVal></Band></covDim></ImgDesc></contInfo><mdDateSt Sync="TRUE">20120801</mdDateSt></metadata>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment