dacort/Dockerfile

## Dockerfile
FROM 895885662937.dkr.ecr.us-west-2.amazonaws.com/spark/emr-6.3.0:latest

### Switch to root for installation
USER root

### setup for conda
RUN  yum install -y wget

### Install Conda into shared location
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py39_4.10.3-Linux-x86_64.sh \
    && bash Miniconda3-py39_4.10.3-Linux-x86_64.sh -b -p /opt/miniconda3 \
    && rm -f Miniconda3-py39_4.10.3-Linux-x86_64.sh

### Prefix PATH with conda so it overrides system python
### Note that we still need to provide the Python path to the Spark job
ENV PATH="/opt/miniconda3/bin:$PATH"

### Install Iris and boto3
RUN conda install -y -c conda-forge \
    boto3 \
    iris \
    iris-sample-data

### We also copy in a test script to verify Iris installation
### COPY test-iris.py /

### Switch back to the hadoop user
USER hadoop:hadoop
	FROM 895885662937.dkr.ecr.us-west-2.amazonaws.com/spark/emr-6.3.0:latest

	### Switch to root for installation
	USER root

	### setup for conda
	RUN yum install -y wget

	### Install Conda into shared location
	RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py39_4.10.3-Linux-x86_64.sh \
	&& bash Miniconda3-py39_4.10.3-Linux-x86_64.sh -b -p /opt/miniconda3 \
	&& rm -f Miniconda3-py39_4.10.3-Linux-x86_64.sh

	### Prefix PATH with conda so it overrides system python
	### Note that we still need to provide the Python path to the Spark job
	ENV PATH="/opt/miniconda3/bin:$PATH"

	### Install Iris and boto3
	RUN conda install -y -c conda-forge \
	boto3 \
	iris \
	iris-sample-data

	### We also copy in a test script to verify Iris installation
	### COPY test-iris.py /

	### Switch back to the hadoop user
	USER hadoop:hadoop