Dockerfile for spacy prodigy for cloud setup using remote postgresql including changing instructions file and overriding index.html - and the leanest I've got
FROM python:3.6-alpine | |
# Opted for alpine to get a lean docker image as possible | |
RUN apk add --no-cache openssl | |
ENV DOCKERIZE_VERSION v0.6.1 | |
RUN wget https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-alpine-linux-amd64-$DOCKERIZE_VERSION.tar.gz \ | |
&& tar -C /usr/local/bin -xzvf dockerize-alpine-linux-amd64-$DOCKERIZE_VERSION.tar.gz \ | |
&& rm dockerize-alpine-linux-amd64-$DOCKERIZE_VERSION.tar.gz | |
# Python deps for alpine | |
RUN apk add --no-cache postgresql-libs && \ | |
apk add --no-cache --virtual .build-deps gcc musl-dev postgresql-dev g++ | |
RUN mkdir -pv /prodigy /prodigy/src | |
WORKDIR /prodigy | |
# the prodigy wheel file is something you get when you buy prodigy, it's not a free package | |
COPY ./*.whl /prodigy | |
COPY requirements.txt /prodigy | |
RUN pip install -r requirements.txt --no-cache-dir \ | |
&& find /usr/local \ | |
\( -type d -a -name test -o -name tests \) \ | |
-o \( -type f -a -name '*.pyc' -o -name '*.pyo' \) \ | |
-exec rm -rf '{}' + \ | |
&& runDeps="$( \ | |
scanelf --needed --nobanner --recursive /usr/local \ | |
| awk '{ gsub(/,/, "\nso:", $2); print "so:" $2 }' \ | |
| sort -u \ | |
| xargs -r apk info --installed \ | |
| sort -u \ | |
)" \ | |
&& apk add --virtual .rundeps $runDeps \ | |
&& apk del .build-deps | |
ENV PRODIGY_HOME /prodigy | |
# the template that dockerize would use to create the actual prodigy.json config based on environment variables | |
COPY ./prodigy.json.tpl /prodigy/prodigy.json.tpl | |
#Preparing an instructions file | |
COPY ./instructions.txt /prodigy/instructions.txt | |
COPY *.sh /prodigy/ | |
COPY src/* /prodigy/src/ | |
# Comment the two next lines if you don't override the index.html file | |
COPY static/index.html index.html | |
RUN PRODIGY_FILES=`python -c "import prodigy; print(prodigy.__file__.replace(\"__init__.py\",''))"`; cp index.html $PRODIGY_FILES/static/ | |
# The actual entry point compiling the template | |
CMD ["dockerize", "-template", "/prodigy/prodigy.json.tpl:/prodigy/prodigy.json", "./launch.sh"] | |
EXPOSE 8080 | |
{ | |
"batch_size": 5, | |
"host":"0.0.0.0", | |
"instructions":"/prodigy/instructions.txt", | |
"hide_meta": true, | |
"choice_auto_accept": true, | |
"db": "postgresql", | |
"db_settings": { | |
"postgresql": { | |
"host":"{{ .Env.DB_HOST }}", | |
"dbname": "{{ .Env.DATABASE_NAME }}", | |
"port": 5432, | |
"user":"{{ .Env.DB_USERNAME }}", | |
"password":"{{ .Env.DB_PASSWORD }}" | |
} | |
} | |
} |
This comment has been minimized.
This comment has been minimized.
Cool |
This comment has been minimized.
This comment has been minimized.
Cool
|
This comment has been minimized.
This comment has been minimized.
@walterg2 what did you add to your launch.sh? new to the subject and kinda clueless :) |
This comment has been minimized.
This comment has been minimized.
@lukleu The #!/usr/bin/env bash
prodigy dataset $(your dataset) $(string name of dataset)
prodigy textcat......... For my needs, I needed a Text Categorization with a special recipe (to select multiple choices), but YMMV. |
This comment has been minimized.
This comment has been minimized.
@walterg2 thank you Sir! i was suspecting as much but was really unsure |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
This is awesome, but I have a couple of questions:
requirements.txt
file? I think I'm missing something which is causing the container to fail.launch.sh
file? Just wanting to verify that what I've added for my version is what I think this file is doing.