Skip to content

Instantly share code, notes, and snippets.

@ungarj
Last active December 15, 2015 22:48
Show Gist options
  • Save ungarj/5335126 to your computer and use it in GitHub Desktop.
Save ungarj/5335126 to your computer and use it in GitHub Desktop.
{
"remotewcsurl" : "http://neso.cryoland.enveo.at/cryoland/ows?service=wcs&version=2.0.0&",
"localwcsurl" : "http://earthserver.eox.at/eoxserver/ows?service=wcs&version=2.0.0&",
"datasetseries": {
"daily_FSC_Baltic_Optical": {
"local_dir": "/home/ungarj/temp/getSnowMaps/CRYOLAND/daily_FSC_Baltic_Optical/",
"collection": "daily_FSC_Baltic_Optical",
"datatype" : "GreyCube:GreySet3"
}
},
"datasetseries_commented": {
"multitemp_FSC_Scandinavia_Optical": {
"local_dir": "/media/data/transfer/CRYOLAND/multitemp_FSC_Scandinavia_Optical/",
"collection": "multitemp_FSC_Scandinavia_Optical",
"datatype" : "GreyCube:GreySet3"
},
"daily_SWE_PanEuropean_Microwave": {
"local_dir": "/media/data/transfer/CRYOLAND/daily_SWE_PanEuropean_Microwave/",
"collection": "daily_SWE_PanEuropean_Microwave",
"datatype" : "GreyCube:GreySet3"
},
"daily_FSC_PanEuropean_Optical": {
"local_dir": "/media/data/transfer/CRYOLAND/daily_FSC_PanEuropean_Optical/",
"collection": "daily_FSC_PanEuropean_Optical",
"datatype" : "GreyCube:GreySet3",
"xres": "12200",
"yres": "7400"
},
"daily_SCAW_Scandinavia_Radar": {
"local_dir": "/media/data/transfer/CRYOLAND/daily_SCAW_Scandinavia_Radar/",
"collection": "daily_SCAW_Scandinavia_Radar",
"datatype" : "GreyCube:GreySet3"
},
"daily_FSC_Alps_Optical": {
"local_dir": "/media/data/transfer/CRYOLAND/daily_FSC_Alps_Optical/",
"collection": "daily_FSC_Alps_Optical",
"datatype" : "GreyCube:GreySet3"
},
"daily_LIE_Baltic_Optical": {
"local_dir": "/media/data/transfer/CRYOLAND/daily_LIE_Baltic_Optical/",
"collection": "daily_LIE_Baltic_Optical",
"datatype" : "GreyCube:GreySet3"
},
"daily_FSC_PanEuropean_Optical_Uncertainty": {
"local_dir": "/media/data/transfer/CRYOLAND/daily_FSC_PanEuropean_Optical_Uncertainty/",
"collection": "daily_FSC_PanEuropean_Optical_Uncertainty",
"datatype" : "GreyCube:GreySet3"
},
"multitemp_FSC_Scandinavia_Optical_Radar": {
"local_dir": "/media/data/transfer/CRYOLAND/multitemp_FSC_Scandinavia_Optical_Radar/",
"collection": "multitemp_FSC_Scandinavia_Optical_Radar",
"datatype" : "GreyCube:GreySet3"
}
},
"storagedir" : "/home/ungarj/temp/getSnowMaps/",
"ansi_date_start" : "datetime(1601, 1, 1)",
"unix_date_start" : "datetime(1970, 1, 1)",
"crs1" : "http://kahlua.eecs.jacobs-university.de:8080/def/crs/EPSG/0/4326",
"crs2" : "http://kahlua.eecs.jacobs-university.de:8080/def/crs/OGC/0/Temporal?epoch=%221970-01-01T00:00:00Z%22%26uom=%22s%22%26label=%22unix%22",
"tiling" : "tiling aligned [0:511,0:511,0:0] tile size 262144"
}
#!/usr/bin/env python
# Copyright (c) 2013
#
# Joachim Ungar (ju@eox.at)
# Fabian Schindler (fs@eox.at)
# Oliver Clements (olcl@pml.ac.uk)
#
# This code was developed during the EU e-Infrastructure funded project
# EarthServer (www.earthserver.eu, Project Number: 283610).
#
# Some parts were used from a script developed by Oliver Clements:
# https://gist.github.com/doclements/5f3a29ee94e6ed9978bb
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is furnished
# to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import urllib2
import os
import subprocess
import sys
import argparse
import pickle
import fnmatch
import tempfile
import json
from osgeo import gdal
from datetime import datetime, timedelta, date, time
from xml.dom import minidom
# Configuration
# =============
ansi_date_start = datetime(1601, 1, 1)
unix_date_start = datetime(1970, 1, 1)
remote_file_list = []
diff_list = []
describe_eo_coverage_set_url = ""
get_coverage_url = ""
def main(args):
parser = argparse.ArgumentParser()
parser.add_argument("--no-download", dest="download", default=True, action="store_false")
parser.add_argument("--no-ingest", dest="ingest", default=True, action="store_false")
parser.add_argument("--no-register", dest="register", default=True, action="store_false")
parser.add_argument("--config", dest="config")
parser.add_argument("timediff", nargs=1, type=int)
parsed = parser.parse_args(args)
download = parsed.download
ingest = parsed.ingest
register = parsed.register
timediff = parsed.timediff[0]
# load config
with open(parsed.config, "r") as f:
config = json.load(f)
remotewcsurl = config["remotewcsurl"]
localwcsurl = config["localwcsurl"]
datasetseries = config["datasetseries"]
storagedir = config["storagedir"]
tiling = config["tiling"]
crs1 = config["crs1"]
crs2 = config["crs2"]
#print "ansi_date_start " + str(ansi_date_start)
#print "unix_date_start " + str(unix_date_start)
#print "remotewcsurl " + remotewcsurl
#print "localwcsurl " + localwcsurl
#print "datasetseries " + str(datasetseries)
#print "storagedir " + storagedir
#print "tiling " + tiling
print ("\n")
print datetime.now()
print ("==========")
print ("\n")
for i in datasetseries:
diff_list = []
remote_file_list = []
local_file_list = []
local_dir = datasetseries[i]["local_dir"]
dataset_series_id = i
collection = datasetseries[i]["collection"]
describe_eo_coverage_set_url = remotewcsurl + "request=describeeocoverageset&eoid=" + i
if "xres" in datasetseries[i].keys():
xres = int(datasetseries[i]["xres"])
yres = int(datasetseries[i]["yres"])
else:
xres = 0
yres = 0
if not os.path.exists(local_dir):
os.makedirs(local_dir)
local_file_list = get_local_filelist(local_dir)
time_offset = int(timediff)
print (collection + ":")
print ("-------------------------------")
print ("\n")
try:
if download:
print "download"
# create DescribeEOCoverageSet URL
describe_eo_coverage_set_url = remotewcsurl + "request=describeeocoverageset&eoid=" + i
# make DescribeEOCoverage request
describe_eo_coverage_set_xml = describe_eo_coverage_set(describe_eo_coverage_set_url,time_offset)
# parse response and get list of remote files
remote_file_list = parse_xml(describe_eo_coverage_set_xml)
# compare remote and local files and determine files to be downloaded
diff_list = compare_files(remote_file_list,local_file_list)
#print (str(len(remote_file_list)) + " item(s) since " + str(TIMEOFFSET) + " days")
print (str(len(remote_file_list)) + " item(s) since " + str(time_offset) + " days")
print (str(len(diff_list)) + " new")
# download files
download_files(remotewcsurl,diff_list,local_dir)
if ingest:
print "ingest"
# import in rasdaman
datatype = datasetseries[i]["datatype"]
import_files(local_dir,collection,datatype,crs1,crs2,xres,yres,storagedir,remotewcsurl,tiling)
if register:
print "register"
# create DescribeEOCoverageSet URL
describe_eo_coverage_set_url = remotewcsurl + "request=describeeocoverageset&eoid=" + i
# make DescribeEOCoverage request
describe_eo_coverage_set_xml = describe_eo_coverage_set(describe_eo_coverage_set_url,time_offset)
# parse response and get list of remote files
#remote_file_list = parse_xml(describe_eo_coverage_set_xml)
remote_file_list = ""
# compare remote and local files and determine files to be downloaded
local_file_list = get_local_filelist(local_dir)
register_list = compare_files(local_file_list,remote_file_list)
print (str(len(local_file_list)) + " on local file list")
print (str(len(remote_file_list)) + " on remote list")
print (str(len(register_list)) + " on register list")
register_files(register_list,local_dir,collection)
print ("done")
except urllib2.URLError, e:
print ("ERROR: " + str(e))
if ingest and len(diff_list) > 0:
print "cleaning up and restarting... "
#clean_up_petascope()
restart_rasdaman()
# get local filelist
def get_local_filelist(dir):
filelist = []
for file in os.listdir(dir):
if fnmatch.fnmatch(file, '*.tif'):
filelist.append(file)
return filelist
# make a DescribeEOCoverageSet request and return the response XML
def describe_eo_coverage_set(describe_eo_coverage_set_url,time_offset):
end_time = date.today()
#start_time = end_time - timedelta(days=TIMEOFFSET)
start_time = end_time - timedelta(days=time_offset)
describe_eo_coverage_set_url = describe_eo_coverage_set_url + '&subset=phenomenonTime("' + str(start_time) + '","' + str(end_time) + '")'
describe_eo_coverage_set_xml = urllib2.urlopen(describe_eo_coverage_set_url)
return describe_eo_coverage_set_xml
# parse DescribeEOCoverageSet XML and return list of remote coverages
def parse_xml(describe_eo_coverage_set_xml):
remote_file_list = []
xml = minidom.parse(describe_eo_coverage_set_xml)
itemlist = xml.getElementsByTagName('wcs:CoverageDescription')
for i in itemlist:
remote_file_list.append(i.attributes['gml:id'].value)
return remote_file_list
# returns list of coverages available remotely but not locally
def compare_files(remote_file_list,local_file_list):
diff_list = []
for i in remote_file_list:
if i not in local_file_list:
diff_list.append(i)
return diff_list
# download files
def download_files(remotewcsurl,diff_list,local_dir):
for i in diff_list:
get_coverage_url = remotewcsurl + "request=getcoverage&coverageid=" + i
download_file = local_dir + i
print ("downloading " + i + "...")
try:
open(download_file, 'w').write(urllib2.urlopen(get_coverage_url).read())
print ("download successful")
except urllib2.URLError, e:
print ("ERROR: " + e)
# retrieve id(s) of object(s) in given rasdaman collection
def get_oid(col):
p = subprocess.Popen('rasql -q "select (long)oid(c) from %s as c" --out string' % col , stderr=subprocess.STDOUT, shell=True, stdout=subprocess.PIPE)
p.wait()
output = p.stdout.read()
p.stdout.close()
oid = []
for line in output.split('\n'):
if 'Result element' in line:
oid.append(line.split(':')[1].strip())
return oid
# import new files in rasdaman collection
def import_files(dir,collection,datatype,crs1,crs2,xres,yres,storagedir,remotewcsurl,tiling):
# pickle exports two lists as textfiles during the import procedure for future runs of the script:
rasdaman_success_location = storagedir + "rasdaman_" + collection + "_success"
rasdaman_error_location = storagedir + "rasdaman_" + collection + "_error"
# open list of successfully imported coverages from previous run or create if not available.
rasdaman_success = import_list(rasdaman_success_location)
# open list of coverages not successfully imported or create if not available.
rasdaman_error = import_list(rasdaman_error_location)
# merge list of new coverages with list of old coverages not yet imported
local_file_list = get_local_filelist(dir)
diff_list = compare_files(local_file_list,rasdaman_success)
ingest_list = diff_list + rasdaman_error
print (str(len(ingest_list)) + " coverages to be ingested")
# reset rasdaman_error list
rasdaman_error = []
rasdaman_error_file = open(rasdaman_error_location,'w+')
pickle.dump(rasdaman_error, rasdaman_error_file)
# sort ingest_list
ingest_list = sort_list(ingest_list,dir,remotewcsurl)
# ingest coverages
for i in ingest_list:
# get timestamp
unixtime = get_timestamp(i,remotewcsurl,dir)
ingestfile = dir+i
temp_ingestfile = ""
if (xres>0) and (yres>0):
datafile = gdal.Open(dir+i)
cols = datafile.RasterXSize
rows = datafile.RasterYSize
print "%s, %s" %(xres, yres)
print "%s, %s" %(cols, rows)
if (cols!=xres) and (rows!=yres):
print "resampling ..."
temp_ingestfile = tempfile.mktemp()
resize = "gdal_translate -outsize %s %s %s %s" %(xres, yres, ingestfile, temp_ingestfile)
p = subprocess.Popen(resize, stderr=subprocess.STDOUT, shell=True, stdout=subprocess.PIPE)
p.wait()
output = p.stdout.read()
print output
ingestfile = temp_ingestfile
else:
print "no resampling"
try:
# TODO: more detailed error handling
# (1) error handling on OID read error --> solution restart rasdaman
# (2)
# ERROR - RasdamanHelper2::updateImage, l. 678: image update failed: RasManager Error: Write transaction in progress, please retry again later.
# ERROR - rimport::main, l. 1364: RasManager Error: Write transaction in progress, please retry again later.
# TODO: check on import via rasql?
# $ rasql -q "SELECT 1 FROM <collection> AS c WHERE some_cells(c[*:*,*:*,<t_i>] != 0)" --out hex | grep "Query result" | awk 'BEGIN {FS=" "}; { print $5 }' | { read out; [[ $out -eq "1" ]] && echo TRUE || echo FALSE; }
# TODO: backup solution for rasdaman_success lists?
# TODO: reset ps_domain table
print unixtime
rasimport = 'rasimport -f %s --coll %s --coverage-name %s -t %s --crs-uri "%s":"%s" --crs-order 1:0:2 --csz 1 --z-coords %s --tiling "%s"' % (ingestfile, collection, collection, datatype, crs1, crs2, unixtime, tiling)
print ("Import " + i + " to " + collection)
print rasimport
# start rasimport
p = subprocess.Popen(rasimport, stderr=subprocess.STDOUT, shell=True, stdout=subprocess.PIPE)
p.wait()
output = p.stdout.read()
print output
if output:
print "rasdaman input error"
rasdaman_error.append(i)
export_list(rasdaman_error,rasdaman_error_location)
else:
print "import successful"
rasdaman_success.append(i)
export_list(rasdaman_success,rasdaman_success_location)
p.stdout.close()
except:
print "rasdaman input error"
rasdaman_error.append(i)
export_list(rasdaman_error,rasdaman_error_location)
print "\n"
if os.path.isfile(temp_ingestfile):
os.remove(temp_ingestfile)
# sort file list according to timestamp
def sort_list(ingest_list,dir,remotewcsurl):
time_dict = {}
for i in ingest_list:
time_dict[i] = get_timestamp(i,remotewcsurl,dir)
return sorted(ingest_list, key=lambda i: time_dict[i])
# get UNIX timestamp
def get_timestamp(i,remotewcsurl,dir):
describe_coverage_url = remotewcsurl + "request=describecoverage&coverageid=" + i
describe_coverage_xml = dir + i + ".xml"
try:
open(describe_coverage_xml, 'r')
#print ("reading local metadata")
except:
open(describe_coverage_xml, 'w').write(urllib2.urlopen(describe_coverage_url).read())
open(describe_coverage_xml)
#print ("getting metadata from server")
# determine coverage timestamp and convert to UNIX timestamp
xml = minidom.parse(describe_coverage_xml)
coverage_date = xml.getElementsByTagName('gml:timePosition').item(0).firstChild.wholeText
coverage_date = datetime.strptime(coverage_date, "%Y-%m-%dT%H:%M:%SZ")
unixtime = (coverage_date - unix_date_start).days*24*60*60
return unixtime
# import list from previous run or create if not available
def import_list(location):
try:
file = open(location,'r+')
list = pickle.load(file)
except IOError, EOFError:
list = []
file = open(location,'w+')
pickle.dump(list,file)
return list
# export list for next run
def export_list(list,location):
file = open(location,'w+')
pickle.dump(list, file)
# renaming z-axis in petascopedb and restart rasdaman
def clean_up_petascope():
print ("cleaning up...")
for d in datasetseries:
collection = datasetseries[d]["collection"]
column = ["type='6'", "name='t'"]
for c in column:
sql = "psql -d petascopedb -c \"update ps_domain set %s where coverage=(select id from ps_coverage where name=\'%s\') and i=\'2\'\"" % (c,collection)
print sql
p = subprocess.Popen(sql, stderr=subprocess.STDOUT, shell=True, stdout=subprocess.PIPE)
p.wait()
output = p.stdout.read()
print output
def restart_rasdaman():
restart = "sudo /etc/init.d/rasdaman restart"
print restart
try:
p = subprocess.Popen(restart, stderr=subprocess.STDOUT, shell=True, stdout=subprocess.PIPE)
p.wait()
output = p.stdout.read()
print output
except:
print ("ERROR occured")
def register_files(register_list,dir,collection):
print (str(len(register_list)) + " coverages to be registered")
# TODO loop through diff:
for r in register_list:
print r
# TODO get timestamp & footprint
# TODO register to dataset series
if __name__ == "__main__":
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment