Created
September 29, 2020 02:03
-
-
Save ian-whitestone/d3b876e77743923b112d7d004d86480c to your computer and use it in GitHub Desktop.
Code snippets for single node Dask cluster on GCP blog post
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
gcloud compute instances create-with-container dask-cluster-instance \ | |
--zone=us-central1-a \ | |
--machine-type=e2-highcpu-16 \ | |
--tags=http-server,https-server \ | |
--container-env=MEMORY_PER_WORKER=1,THREADS_PER_WORKER=1 \ | |
--container-image=registry.hub.docker.com/ianwhitestone/domi-dask:latest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -x | |
if [ "$NUM_WORKERS" ]; then | |
echo "NUM_WORKERS environment variable found. Setting number of workers to $NUM_WORKERS." | |
num_workers=$NUM_WORKERS | |
else | |
num_cores=$(poetry run python -c "import multiprocessing; print(multiprocessing.cpu_count())") | |
echo "NUM_WORKERS environment variable NOT found. Defaulting to $num_cores workers" | |
num_workers=$num_cores | |
fi | |
if [ "$SCHEDULER_MEMORY" ]; then | |
echo "SCHEDULER_MEMORY environment variable found. Setting scheduler memory to $SCHEDULER_MEMORY GB" | |
scheduler_memory=$SCHEDULER_MEMORY | |
else | |
echo "SCHEDULER_MEMORY environment variable NOT found. Defaulting to 2GB" | |
scheduler_memory=2 | |
fi | |
if [ "$MEMORY_PER_WORKER" ]; then | |
echo "MEMORY_PER_WORKER environment variable found. Setting memory per work to $MEMORY_PER_WORKER." | |
memory_per_worker=$MEMORY_PER_WORKER | |
else | |
total_memory=$(poetry run python -c "from distributed.system import memory_limit; print (memory_limit()/1e9)") | |
echo "MEMORY_PER_WORKER environment variable NOT found. "\ | |
"Defaulting to ($total_memory - $scheduler_memory)/$num_cores GB per worker" | |
available_worker_memory=$(echo "$total_memory - $scheduler_memory" | bc -l) | |
memory_per_worker=$(echo "$available_worker_memory/$num_cores" | bc -l) | |
echo "Memory per worker set to ${memory_per_worker}GB" | |
fi | |
if [ "$THREADS_PER_WORKER" ]; then | |
echo "THREADS_PER_WORKER environment variable found. Setting threads per work to $THREADS_PER_WORKER." | |
threads_per_worker=$THREADS_PER_WORKER | |
else | |
echo "THREADS_PER_WORKER environment variable NOT found. Defaulting to 1 thread per worker" | |
threads_per_worker=1 | |
fi | |
# Start the dask scheduler & workers | |
echo "Starting dask-scheduler" | |
poetry run dask-scheduler > log.txt 2>&1 & | |
echo "Creating $num_workers dask workers" | |
for i in `seq $num_workers` | |
do | |
poetry run dask-worker \ | |
--nthreads $threads_per_worker \ | |
--memory-limit "${memory_per_worker}GB" \ | |
127.0.0.1:8786 > log.txt 2>&1 & | |
done | |
tail -f log.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM python:3.7.6-buster | |
# Set the working directory | |
RUN mkdir /opt/app | |
WORKDIR /opt/app | |
# Copy poetry files into docker image | |
COPY pyproject.toml . | |
COPY poetry.lock . | |
# Copy dask-entrypoint.sh into docker image & make the script executable | |
COPY docker/dask-entrypoint.sh /usr/local/bin/dask-entrypoint.sh | |
RUN chmod +x /usr/local/bin/dask-entrypoint.sh | |
# Linux updates & dependencies | |
RUN apt-get update -y | |
RUN apt-get install -y libpq-dev | |
RUN apt-get install -y pandoc | |
# Build poetry environment | |
RUN pip install poetry | |
RUN poetry config virtualenvs.in-project false | |
RUN poetry config virtualenvs.path ~/.virtualenvs | |
RUN poetry install --no-root | |
# dask-entrypoint.sh will start up the dask scheduler & workers | |
ENTRYPOINT ["/usr/local/bin/dask-entrypoint.sh"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment