Basic Dockerfile:
FROM continuumio/miniconda
RUN apt-get update
RUN apt-get install -y software-properties-common
RUN apt-get install -y vim.tiny
RUN mkdir -p /usr/workdir
WORKDIR /usr/workdir
# helper aliases
RUN echo 'alias ll="ls -ls"' >> ~/.bashrc
RUN echo 'alias vim="vim.tiny"' >> ~/.bashrc
RUN conda init bash
# create env and install R
RUN conda create -y --name venv -c conda-forge \
r=3.6.0 \
r-essentials
RUN conda install -y --name venv python=3.6 \
pip
# activate the env
RUN echo "conda activate venv" >> ~/.bashrc
ENV PATH="/opt/conda/envs/venv/bin:${PATH}"
COPY run_workflow.sh .
RUN ["chmod", "+x", "run_workflow.sh"]
COPY execute_files.py .
# ARG R_VER=3.6
RUN echo "for(i in 1:10) {print(i)}" > users_code.R
RUN echo "import abc" > users_code2.R
# ENTRYPOINT sh run_workflow.sh
How to build the image: docker build -t re3-image .
How to run: docker run -ti --rm re3-image
How to run in terminar (wihout entrypoint): docker run -ti --rm re3-image /bin/bash
How to run with argumnts: docker run -e FILE_ID='3.6' -ti --rm re3-image
Workflow file (run_workflow.sh):
#!/bin/bash
# file_id="$1" # get FILE_ID
# TODO: python3 pull_r_files.py "$file_id"
python3 execute_files.py
Python file execute_files.py:
import glob
def execute_files(f):
from subprocess import PIPE, CalledProcessError, check_call, Popen, TimeoutExpired
# python file_name.py
# Rscript file_name.R
# creates new subprocess to execute R file as: "Rscript filename.R"
p3 = Popen(['/opt/conda/envs/venv/bin/Rscript', f], \
stdout=PIPE, stderr=PIPE)
res = ""
try:
# time limit for execution is one hour
stdout, stderr = p3.communicate(timeout=3600)
# if subprocess is not successful, it returns the error:
if p3.returncode != 0:
res = stderr.splitlines()[-5:-1]
res = b' '.join(res)
res_str = res.decode("utf-8") if type(res) is bytes else res
import re
ret =re.findall(r'(?:Error).*', res_str)
else:
# else returns success
ret = "success"
# or time limit exceeded
except TimeoutExpired:
p3.kill()
stdout, stderr = p3.communicate()
ret = "time limit exceeded"
return ret
def main():
# 1st step: collect all files
list_of_r_files = glob.glob("*.R")
list_of_r_files.extend(glob.glob('*.r'))
# 2nd step: executes files
results = []
for f in list_of_r_files:
print(f)
res = execute_files(f)
print(res)
# 3rd step: send results to the form
# for a,b in results:
# socketIO.print(a, b)
if __name__ == "__main__":
main()