Zodiase/Dockerfile

## config.py
# =============================================================================
#
# In order for this extractor to run according to your preferences,
# the following parameters need to be set.
#
# Some parameters can be left with the default values provided here - in that
# case it is important to verify that the default value is appropriate to
# your system. It is especially important to verify that paths to files and
# software applications are valid in your system.
#
# =============================================================================

import os

# name to show in rabbitmq queue list
extractorName = os.getenv('RABBITMQ_QUEUE', "terra.hyperspectral")

# URL to be used for connecting to rabbitmq
rabbitmqURL = os.getenv('RABBITMQ_URI', "amqp://guest:guest@localhost/%2f")

# name of rabbitmq exchange
rabbitmqExchange = os.getenv('RABBITMQ_EXCHANGE', "clowder")

# type of files to process
messageType = "*.dataset.file.added"

# trust certificates, set this to false for self signed certificates
sslVerify = os.getenv('RABBITMQ_SSLVERIFY', False)

# Location of terraref.sh
workerScript = os.getenv('WORKER_SCRIPT', "terraref.sh")

# Workspace for input/output files.
inputDirectory = os.getenv('INPUTDIR', "./input")
outputDirectory = os.getenv('OUTPUTDIR', "./output")

# The extractor will only run when all these files are present.
# These are just filename postfixes for file matching.
# A few other things depend on the `_raw` file.
requiredInputFiles = [
	'_raw',
	'_raw.hdr',
	'_image.jpg',
	'_metadata.json',
	'_frameIndex.txt',
	'_settings.txt'
]

## Dockerfile
# Dockerfile for the TerraRef hyperspectral image conversion extractor
# August 17, 2016
FROM ubuntu:14.04
MAINTAINER Yan Y. Liu <yanliu@illinois.edu>

# install common libraries and python modules
USER root
RUN apt-get update
RUN apt-get upgrade -y -q
RUN apt-get install -y -q build-essential m4 swig antlr libantlr-dev udunits-bin libudunits2-dev unzip cmake wget git libjpeg-dev libpng-dev libtiff-dev
RUN apt-get install -y -q python-dev python-numpy python-pip python-virtualenv
# set up dirs for user installed software
RUN useradd -m -s /bin/bash ubuntu
RUN mkdir /srv/downloads && chown -R ubuntu: /srv/downloads && \
    mkdir /srv/sw && chown -R ubuntu: /srv/sw

USER ubuntu
# set env vars for common libraries and python paths
ENV PYTHONPATH="/usr/lib/python2.7/dist-packages:${PYTHONPATH}"

## install from source

# hdf5
RUN cd /srv/downloads && \
    wget -q https://www.hdfgroup.org/ftp/HDF5/releases/hdf5-1.8.17/src/hdf5-1.8.17.tar.gz && \
    tar xfz hdf5-1.8.17.tar.gz && \
    cd hdf5-1.8.17 && \
    ./configure --prefix=/srv/sw/hdf5-1.8.17 && \
    make && make install
ENV PATH="/srv/sw/hdf5-1.8.17/bin:${PATH}" \
    LD_LIBRARY_PATH="/srv/sw/hdf5-1.8.17/lib:${LD_LIBRARY_PATH}"

# netcdf4
RUN cd /srv/downloads && \
    wget -q ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-4.4.1.tar.gz && \
    tar xfz netcdf-4.4.1.tar.gz && \
    cd netcdf-4.4.1 && \
    CFLAGS="-I/srv/sw/hdf5-1.8.17/include " LDFLAGS=" -L/srv/sw/hdf5-1.8.17/lib " LIBS=" -lhdf5 -lhdf5_hl " ./configure --prefix=/srv/sw/netcdf-4.4.1 --enable-netcdf4 && \
    make && make install
ENV PATH="/srv/sw/netcdf-4.4.1/bin:${PATH}" \
    LD_LIBRARY_PATH="/srv/sw/netcdf-4.4.1/lib:${LD_LIBRARY_PATH}"

# geos
RUN cd /srv/downloads && \
    wget -q http://download.osgeo.org/geos/geos-3.5.0.tar.bz2 && \
    tar xfj geos-3.5.0.tar.bz2 && \
    cd geos-3.5.0 && \
    ./configure --prefix=/srv/sw/geos --enable-python && \
    make && make install
ENV PATH="/srv/sw/geos/bin:${PATH}" \
    PYTHONPATH="/srv/sw/geos/lib/python2.7/site-packages:${PYTHONPATH}" \
    LD_LIBRARY_PATH="/srv/sw/geos/lib:${LD_LIBRARY_PATH}"

# proj4
RUN cd /srv/downloads && \
    wget -q https://github.com/OSGeo/proj.4/archive/4.9.2.tar.gz -O proj.4-4.9.2.tar.gz && \
    tar xfz proj.4-4.9.2.tar.gz && \
    cd proj.4-4.9.2 && \
    ./configure --prefix=/srv/sw/proj4  && \
    make && make install
ENV PATH="/srv/sw/proj4/bin:${PATH}" \
    LD_LIBRARY_PATH="/srv/sw/proj4/lib:${LD_LIBRARY_PATH}"

# gdal
RUN cd /srv/downloads && \
    wget -q http://download.osgeo.org/gdal/2.1.1/gdal-2.1.1.tar.gz && \
    tar xfz gdal-2.1.1.tar.gz && \
    cd gdal-2.1.1 && \
    ./configure --with-libtiff=internal --with-geotiff=internal --with-png=internal --with-jpeg=internal --with-gif=internal --without-curl --with-python --with-hdf5=/srv/sw/hdf5-1.8.17 --with-netcdf=/srv/sw/netcdf-4.4.1 --with-geos=/srv/sw/geos/bin/geos-config --with-threads --prefix=/srv/sw/gdal && \
    make && make install
ENV PATH="/srv/sw/gdal/bin:${PATH}" \
    PYTHONPATH="/srv/sw/gdal/lib/python2.7/site-packages:${PYTHONPATH}" \
    LD_LIBRARY_PATH="/srv/sw/gdal/lib:${LD_LIBRARY_PATH}"

# nco
RUN cd /srv/downloads && \
    wget -q https://github.com/nco/nco/archive/4.6.1.tar.gz -O nco-4.6.1.tar.gz && \
    tar xfz nco-4.6.1.tar.gz && \
    cd nco-4.6.1 && \
    ./configure NETCDF_ROOT=/srv/sw/netcdf-4.4.1 --prefix=/srv/sw/nco-4.6.1 --enable-ncap2 --enable-udunits2 && \
    make && make install
ENV PATH="/srv/sw/nco-4.6.1/bin:${PATH}" \
    LD_LIBRARY_PATH="/srv/sw/nco-4.6.1/lib:${LD_LIBRARY_PATH}"

ENV USERHOME="/home/ubuntu"
WORKDIR "${USERHOME}"

## install pyclowder
# install python modules
RUN cd ${USERHOME} && \
    virtualenv pyenv && \
    . pyenv/bin/activate && \
    pip install pika && \
    CC=gcc CXX=g++ USE_SETUPCFG=0 HDF5_INCDIR=/srv/sw/hdf5-1.8.17/include HDF5_LIBDIR=/srv/sw/hdf5-1.8.17/lib NETCDF4_INCDIR=/srv/sw/netcdf-4.4.1/include NETCDF4_LIBDIR=/srv/sw/netcdf-4.4.1/lib pip install netCDF4 && \
    pip install git+https://opensource.ncsa.illinois.edu/stash/scm/cats/pyclowder.git@bugfix/CATS-554-add-pyclowder-support-for-dataset && \
    deactivate

## install hyperspectral image converter script
ENV PIPELINEDIR="${USERHOME}/computing-pipeline"
RUN git clone https://github.com/terraref/computing-pipeline.git "${PIPELINEDIR}"

## create workspace directories
ENV INPUTDIR="${USERHOME}/input" \
    OUTPUTDIR="${USERHOME}/output"
RUN mkdir -p "${INPUTDIR}" && \
    mkdir -p "${OUTPUTDIR}" && \
    mkdir -p "${USERHOME}/logs" \
    mkdir -p "${USERHOME}/test-data"

## download test input data
RUN wget -q http://141.142.168.44/nfiedata/yanliu/terraref-hyperspectral-input-sample.tgz && \
    tar -xf terraref-hyperspectral-input-sample.tgz -C "./test-data" --strip-components 1

## install extractor
ENV RABBITMQ_URI="" \
    RABBITMQ_EXCHANGE="clowder" \
    RABBITMQ_VHOST="%2F" \
    RABBITMQ_QUEUE="terra.hyperspectral" \
    WORKER_SCRIPT="${PIPELINEDIR}/scripts/hyperspectral/terraref.sh"
COPY entrypoint.sh extractor_info.json config.py terra.hyperspectral.py ./
ENTRYPOINT ["./entrypoint.sh"]
CMD ["python", "./terra.hyperspectral.py"]

## entrypoint.sh
#!/bin/bash

set -e

# If RabbitMQ URI is not set, use the default credentials; while doing so,
# handle the linking scenario, where RABBITMQ_PORT_5672 is set.
if [ "$RABBITMQ_URI" == "" ]; then
    if [ -n $RABBITMQ_PORT_5672 ]; then
        RABBITMQ_URI="amqp://guest:guest@${RABBITMQ_PORT_5672_TCP_ADDR}:${RABBITMQ_PORT_5672_TCP_PORT}/%2F"
    else
        RABBITMQ_URI="amqp://guest:guest@localhost:5672/%2F"
    fi
fi

. pyenv/bin/activate

printf "exec %s \n\n" "$@"
exec "$@"

## extractor.py
#!/usr/bin/env python
import os
import subprocess
import logging
from config import *
import pyclowder.extractors as extractors


def main():
	global extractorName, messageType, rabbitmqExchange, rabbitmqURL

	# Set logging
	logging.basicConfig(format='%(levelname)-7s : %(name)s -  %(message)s', level=logging.WARN)
	logging.getLogger('pyclowder.extractors').setLevel(logging.INFO)

	# Connect to rabbitmq
	extractors.connect_message_bus(
		extractorName        = extractorName,
		messageType          = messageType,
		rabbitmqExchange     = rabbitmqExchange,
		rabbitmqURL          = rabbitmqURL,
		processFileFunction  = process_dataset,
		checkMessageFunction = check_message
	)

def check_message(parameters):
	# Check for expected input files before beginning processing
	if has_all_files(parameters):
		if has_output_file(parameters):
			print 'skipping, output file already exists'
			return False
		else:
			# Handle the message but do not download any files automatically.
			return "bypass"
	else:
		print 'skipping, not all input files are ready'
		return False

# ----------------------------------------------------------------------
# Process the dataset message and upload the results
def process_dataset(parameters):
	global extractorName, workerScript, inputDirectory, outputDirectory

	# Find input files in dataset
	files = get_all_files(parameters)

	# Download files to input directory
	for fileExt in files:
		files[fileExt]['path'] = extractors.download_file(
			channel            = parameters['channel'],
			header             = parameters['header'],
			host               = parameters['host'],
			key                = parameters['secretKey'],
			fileid             = files[fileExt]['id'],
			# What's this argument for?
			intermediatefileid = files[fileExt]['id'],
			ext                = fileExt
		)
		# Restore temp filenames to original - script requires specific name formatting so tmp names aren't suitable
		files[fileExt]['old_path'] = files[fileExt]['path']
		files[fileExt]['path'] = os.path.join(inputDirectory, files[fileExt]['filename'])
		os.rename(files[fileExt]['old_path'], files[fileExt]['path'])
		print 'found %s file: %s' % (fileExt, files[fileExt]['path'])

	# Invoke terraref.sh
	outFilePath = os.path.join(outputDirectory, get_output_filename(files['_raw']['filename']))
	print 'invoking terraref.sh to create: %s' % outFilePath
	subprocess.call(["bash", workerScript, "-d", "1", "-I", inputDirectory, "-O", outputDirectory])
	print 'done creating output file'

	# Verify outfile exists and upload to clowder
	if os.path.exists(outFilePath):
		print 'uploading output file...'
		extractors.upload_file_to_dataset(filepath=outFilePath, parameters=parameters)
		print 'done uploading'

	print 'cleaning up...'
	# Clean up the input files.
	for fileExt in files:
		os.remove(files[fileExt]['path'])
	# Clean up the output file.
	os.remove(outFilePath)
	print 'done cleaning'

# ----------------------------------------------------------------------
# Find as many expected files as possible and return the set.
def get_all_files(parameters):
	files = {
		'_raw': None,
		'_raw.hdr': None,
		'_image.jpg': None,
		'_metadata.json': None,
		'_frameIndex.txt': None,
		'_settings.txt': None
	}

	if 'filelist' in parameters:
		for fileItem in parameters['filelist']:
			fileId   = fileItem['id']
			fileName = fileItem['filename']
			for fileExt in files:
				if fileName[-len(fileExt):] == fileExt:
					files[fileExt] = {
						'id': fileId,
						'filename': fileName
					}
	return files

# ----------------------------------------------------------------------
# Returns the output filename.
def get_output_filename(raw_filename):
	return '%s.nc' % raw_filename[:-len('_raw')]

# ----------------------------------------------------------------------
# Returns true if all expected files are found.
def has_all_files(parameters):
	files = get_all_files(parameters)
	allFilesFound = True
	for fileExt in files:
		if files[fileExt] == None:
			allFilesFound = False
	return allFilesFound

# ----------------------------------------------------------------------
# Returns true if the output file is present.
def has_output_file(parameters):
	if 'filelist' not in parameters:
		return False
	if not has_all_files(parameters):
		return False
	files = get_all_files(parameters)
	outFilename = get_output_filename(files['_raw']['filename'])
	outFileFound = False
	for fileItem in parameters['filelist']:
		if outFilename == fileItem['filename']:
			outFileFound = True
			break
	return outFileFound

if __name__ == "__main__":
	main()
	# =============================================================================
	#
	# In order for this extractor to run according to your preferences,
	# the following parameters need to be set.
	#
	# Some parameters can be left with the default values provided here - in that
	# case it is important to verify that the default value is appropriate to
	# your system. It is especially important to verify that paths to files and
	# software applications are valid in your system.
	#
	# =============================================================================

	import os

	# name to show in rabbitmq queue list
	extractorName = os.getenv('RABBITMQ_QUEUE', "terra.hyperspectral")

	# URL to be used for connecting to rabbitmq
	rabbitmqURL = os.getenv('RABBITMQ_URI', "amqp://guest:guest@localhost/%2f")

	# name of rabbitmq exchange
	rabbitmqExchange = os.getenv('RABBITMQ_EXCHANGE', "clowder")

	# type of files to process
	messageType = "*.dataset.file.added"

	# trust certificates, set this to false for self signed certificates
	sslVerify = os.getenv('RABBITMQ_SSLVERIFY', False)

	# Location of terraref.sh
	workerScript = os.getenv('WORKER_SCRIPT', "terraref.sh")

	# Workspace for input/output files.
	inputDirectory = os.getenv('INPUTDIR', "./input")
	outputDirectory = os.getenv('OUTPUTDIR', "./output")

	# The extractor will only run when all these files are present.
	# These are just filename postfixes for file matching.
	# A few other things depend on the `_raw` file.
	requiredInputFiles = [
	'_raw',
	'_raw.hdr',
	'_image.jpg',
	'_metadata.json',
	'_frameIndex.txt',
	'_settings.txt'
	]
	# Dockerfile for the TerraRef hyperspectral image conversion extractor
	# August 17, 2016
	FROM ubuntu:14.04
	MAINTAINER Yan Y. Liu <yanliu@illinois.edu>

	# install common libraries and python modules
	USER root
	RUN apt-get update
	RUN apt-get upgrade -y -q
	RUN apt-get install -y -q build-essential m4 swig antlr libantlr-dev udunits-bin libudunits2-dev unzip cmake wget git libjpeg-dev libpng-dev libtiff-dev
	RUN apt-get install -y -q python-dev python-numpy python-pip python-virtualenv
	# set up dirs for user installed software
	RUN useradd -m -s /bin/bash ubuntu
	RUN mkdir /srv/downloads && chown -R ubuntu: /srv/downloads && \
	mkdir /srv/sw && chown -R ubuntu: /srv/sw

	USER ubuntu
	# set env vars for common libraries and python paths
	ENV PYTHONPATH="/usr/lib/python2.7/dist-packages:${PYTHONPATH}"

	## install from source

	# hdf5
	RUN cd /srv/downloads && \
	wget -q https://www.hdfgroup.org/ftp/HDF5/releases/hdf5-1.8.17/src/hdf5-1.8.17.tar.gz && \
	tar xfz hdf5-1.8.17.tar.gz && \
	cd hdf5-1.8.17 && \
	./configure --prefix=/srv/sw/hdf5-1.8.17 && \
	make && make install
	ENV PATH="/srv/sw/hdf5-1.8.17/bin:${PATH}" \
	LD_LIBRARY_PATH="/srv/sw/hdf5-1.8.17/lib:${LD_LIBRARY_PATH}"

	# netcdf4
	RUN cd /srv/downloads && \
	wget -q ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-4.4.1.tar.gz && \
	tar xfz netcdf-4.4.1.tar.gz && \
	cd netcdf-4.4.1 && \
	CFLAGS="-I/srv/sw/hdf5-1.8.17/include " LDFLAGS=" -L/srv/sw/hdf5-1.8.17/lib " LIBS=" -lhdf5 -lhdf5_hl " ./configure --prefix=/srv/sw/netcdf-4.4.1 --enable-netcdf4 && \
	make && make install
	ENV PATH="/srv/sw/netcdf-4.4.1/bin:${PATH}" \
	LD_LIBRARY_PATH="/srv/sw/netcdf-4.4.1/lib:${LD_LIBRARY_PATH}"

	# geos
	RUN cd /srv/downloads && \
	wget -q http://download.osgeo.org/geos/geos-3.5.0.tar.bz2 && \
	tar xfj geos-3.5.0.tar.bz2 && \
	cd geos-3.5.0 && \
	./configure --prefix=/srv/sw/geos --enable-python && \
	make && make install
	ENV PATH="/srv/sw/geos/bin:${PATH}" \
	PYTHONPATH="/srv/sw/geos/lib/python2.7/site-packages:${PYTHONPATH}" \
	LD_LIBRARY_PATH="/srv/sw/geos/lib:${LD_LIBRARY_PATH}"

	# proj4
	RUN cd /srv/downloads && \
	wget -q https://github.com/OSGeo/proj.4/archive/4.9.2.tar.gz -O proj.4-4.9.2.tar.gz && \
	tar xfz proj.4-4.9.2.tar.gz && \
	cd proj.4-4.9.2 && \
	./configure --prefix=/srv/sw/proj4 && \
	make && make install
	ENV PATH="/srv/sw/proj4/bin:${PATH}" \
	LD_LIBRARY_PATH="/srv/sw/proj4/lib:${LD_LIBRARY_PATH}"

	# gdal
	RUN cd /srv/downloads && \
	wget -q http://download.osgeo.org/gdal/2.1.1/gdal-2.1.1.tar.gz && \
	tar xfz gdal-2.1.1.tar.gz && \
	cd gdal-2.1.1 && \
	./configure --with-libtiff=internal --with-geotiff=internal --with-png=internal --with-jpeg=internal --with-gif=internal --without-curl --with-python --with-hdf5=/srv/sw/hdf5-1.8.17 --with-netcdf=/srv/sw/netcdf-4.4.1 --with-geos=/srv/sw/geos/bin/geos-config --with-threads --prefix=/srv/sw/gdal && \
	make && make install
	ENV PATH="/srv/sw/gdal/bin:${PATH}" \
	PYTHONPATH="/srv/sw/gdal/lib/python2.7/site-packages:${PYTHONPATH}" \
	LD_LIBRARY_PATH="/srv/sw/gdal/lib:${LD_LIBRARY_PATH}"

	# nco
	RUN cd /srv/downloads && \
	wget -q https://github.com/nco/nco/archive/4.6.1.tar.gz -O nco-4.6.1.tar.gz && \
	tar xfz nco-4.6.1.tar.gz && \
	cd nco-4.6.1 && \
	./configure NETCDF_ROOT=/srv/sw/netcdf-4.4.1 --prefix=/srv/sw/nco-4.6.1 --enable-ncap2 --enable-udunits2 && \
	make && make install
	ENV PATH="/srv/sw/nco-4.6.1/bin:${PATH}" \
	LD_LIBRARY_PATH="/srv/sw/nco-4.6.1/lib:${LD_LIBRARY_PATH}"

	ENV USERHOME="/home/ubuntu"
	WORKDIR "${USERHOME}"

	## install pyclowder
	# install python modules
	RUN cd ${USERHOME} && \
	virtualenv pyenv && \
	. pyenv/bin/activate && \
	pip install pika && \
	CC=gcc CXX=g++ USE_SETUPCFG=0 HDF5_INCDIR=/srv/sw/hdf5-1.8.17/include HDF5_LIBDIR=/srv/sw/hdf5-1.8.17/lib NETCDF4_INCDIR=/srv/sw/netcdf-4.4.1/include NETCDF4_LIBDIR=/srv/sw/netcdf-4.4.1/lib pip install netCDF4 && \
	pip install git+https://opensource.ncsa.illinois.edu/stash/scm/cats/pyclowder.git@bugfix/CATS-554-add-pyclowder-support-for-dataset && \
	deactivate

	## install hyperspectral image converter script
	ENV PIPELINEDIR="${USERHOME}/computing-pipeline"
	RUN git clone https://github.com/terraref/computing-pipeline.git "${PIPELINEDIR}"

	## create workspace directories
	ENV INPUTDIR="${USERHOME}/input" \
	OUTPUTDIR="${USERHOME}/output"
	RUN mkdir -p "${INPUTDIR}" && \
	mkdir -p "${OUTPUTDIR}" && \
	mkdir -p "${USERHOME}/logs" \
	mkdir -p "${USERHOME}/test-data"

	## download test input data
	RUN wget -q http://141.142.168.44/nfiedata/yanliu/terraref-hyperspectral-input-sample.tgz && \
	tar -xf terraref-hyperspectral-input-sample.tgz -C "./test-data" --strip-components 1

	## install extractor
	ENV RABBITMQ_URI="" \
	RABBITMQ_EXCHANGE="clowder" \
	RABBITMQ_VHOST="%2F" \
	RABBITMQ_QUEUE="terra.hyperspectral" \
	WORKER_SCRIPT="${PIPELINEDIR}/scripts/hyperspectral/terraref.sh"
	COPY entrypoint.sh extractor_info.json config.py terra.hyperspectral.py ./
	ENTRYPOINT ["./entrypoint.sh"]
	CMD ["python", "./terra.hyperspectral.py"]
	#!/bin/bash

	set -e

	# If RabbitMQ URI is not set, use the default credentials; while doing so,
	# handle the linking scenario, where RABBITMQ_PORT_5672 is set.
	if [ "$RABBITMQ_URI" == "" ]; then
	if [ -n $RABBITMQ_PORT_5672 ]; then
	RABBITMQ_URI="amqp://guest:guest@${RABBITMQ_PORT_5672_TCP_ADDR}:${RABBITMQ_PORT_5672_TCP_PORT}/%2F"
	else
	RABBITMQ_URI="amqp://guest:guest@localhost:5672/%2F"
	fi
	fi

	. pyenv/bin/activate

	printf "exec %s \n\n" "$@"
	exec "$@"
	#!/usr/bin/env python
	import os
	import subprocess
	import logging
	from config import *
	import pyclowder.extractors as extractors


	def main():
	global extractorName, messageType, rabbitmqExchange, rabbitmqURL

	# Set logging
	logging.basicConfig(format='%(levelname)-7s : %(name)s - %(message)s', level=logging.WARN)
	logging.getLogger('pyclowder.extractors').setLevel(logging.INFO)

	# Connect to rabbitmq
	extractors.connect_message_bus(
	extractorName = extractorName,
	messageType = messageType,
	rabbitmqExchange = rabbitmqExchange,
	rabbitmqURL = rabbitmqURL,
	processFileFunction = process_dataset,
	checkMessageFunction = check_message
	)

	def check_message(parameters):
	# Check for expected input files before beginning processing
	if has_all_files(parameters):
	if has_output_file(parameters):
	print 'skipping, output file already exists'
	return False
	else:
	# Handle the message but do not download any files automatically.
	return "bypass"
	else:
	print 'skipping, not all input files are ready'
	return False

	# ----------------------------------------------------------------------
	# Process the dataset message and upload the results
	def process_dataset(parameters):
	global extractorName, workerScript, inputDirectory, outputDirectory

	# Find input files in dataset
	files = get_all_files(parameters)

	# Download files to input directory
	for fileExt in files:
	files[fileExt]['path'] = extractors.download_file(
	channel = parameters['channel'],
	header = parameters['header'],
	host = parameters['host'],
	key = parameters['secretKey'],
	fileid = files[fileExt]['id'],
	# What's this argument for?
	intermediatefileid = files[fileExt]['id'],
	ext = fileExt
	)
	# Restore temp filenames to original - script requires specific name formatting so tmp names aren't suitable
	files[fileExt]['old_path'] = files[fileExt]['path']
	files[fileExt]['path'] = os.path.join(inputDirectory, files[fileExt]['filename'])
	os.rename(files[fileExt]['old_path'], files[fileExt]['path'])
	print 'found %s file: %s' % (fileExt, files[fileExt]['path'])

	# Invoke terraref.sh
	outFilePath = os.path.join(outputDirectory, get_output_filename(files['_raw']['filename']))
	print 'invoking terraref.sh to create: %s' % outFilePath
	subprocess.call(["bash", workerScript, "-d", "1", "-I", inputDirectory, "-O", outputDirectory])
	print 'done creating output file'

	# Verify outfile exists and upload to clowder
	if os.path.exists(outFilePath):
	print 'uploading output file...'
	extractors.upload_file_to_dataset(filepath=outFilePath, parameters=parameters)
	print 'done uploading'

	print 'cleaning up...'
	# Clean up the input files.
	for fileExt in files:
	os.remove(files[fileExt]['path'])
	# Clean up the output file.
	os.remove(outFilePath)
	print 'done cleaning'

	# ----------------------------------------------------------------------
	# Find as many expected files as possible and return the set.
	def get_all_files(parameters):
	files = {
	'_raw': None,
	'_raw.hdr': None,
	'_image.jpg': None,
	'_metadata.json': None,
	'_frameIndex.txt': None,
	'_settings.txt': None
	}

	if 'filelist' in parameters:
	for fileItem in parameters['filelist']:
	fileId = fileItem['id']
	fileName = fileItem['filename']
	for fileExt in files:
	if fileName[-len(fileExt):] == fileExt:
	files[fileExt] = {
	'id': fileId,
	'filename': fileName
	}
	return files

	# ----------------------------------------------------------------------
	# Returns the output filename.
	def get_output_filename(raw_filename):
	return '%s.nc' % raw_filename[:-len('_raw')]

	# ----------------------------------------------------------------------
	# Returns true if all expected files are found.
	def has_all_files(parameters):
	files = get_all_files(parameters)
	allFilesFound = True
	for fileExt in files:
	if files[fileExt] == None:
	allFilesFound = False
	return allFilesFound

	# ----------------------------------------------------------------------
	# Returns true if the output file is present.
	def has_output_file(parameters):
	if 'filelist' not in parameters:
	return False
	if not has_all_files(parameters):
	return False
	files = get_all_files(parameters)
	outFilename = get_output_filename(files['_raw']['filename'])
	outFileFound = False
	for fileItem in parameters['filelist']:
	if outFilename == fileItem['filename']:
	outFileFound = True
	break
	return outFileFound

	if __name__ == "__main__":
	main()