dingp/Dockerfile

## build_image.sh
NVC_TAG=23.12
NERSC_TAG="ngc-${NVC_TAG}-v0"
IMAGE_LABEL="ncarrara"

docker build --platform linux/amd64 --build-arg nvc_tag=$NVC_TAG-py3 -t $IMAGE_LABEL/pytorch:$NERSC_TAG -f Dockerfile .

## Dockerfile
ARG nvc_tag
FROM nvcr.io/nvidia/pytorch:$nvc_tag

RUN python -m pip install --no-cache-dir -U pip
RUN apt-get update && apt-get install -y rsync strace

# Install parallel HDF5
RUN wget https://github.com/HDFGroup/hdf5/archive/refs/tags/hdf5-1_12_3.tar.gz && \
    tar -xvf hdf5-1_12_3.tar.gz && \
    cd hdf5-hdf5-1_12_3 && \
    ./configure --enable-parallel --enable-shared --prefix=/opt/bin/hdf5 && \
    make -j 8 && \
    make install && \
    rm -rf hdf5-hdf5-1_12_3

# use pip to install and build h5py on top of parallel hdf5
RUN HDF5_MPI=ON CC=mpicc HDF5_DIR=/opt/bin/hdf5 pip install --no-deps --no-binary=h5py h5py

# install other python stuff necessary
RUN pip install --no-cache-dir \
    ruamel.yaml pyyaml cmake ipympl pillow wandb torchsummary pandas \
    jupyter-server-proxy requests tabulate ray ray[tune] ray[rllib] \
    scikit-image mpi4py deepspeed einops lightning \
    git+https://github.com/NERSC/nersc-tensorboard-helper.git

# Install PyG
RUN FORCE_CUDA=1 \
    TORCH_CUDA_ARCH_LIST="8.0" \
    CPATH=/usr/local/cuda/include \
    pip install --no-cache-dir git+https://github.com/pyg-team/pyg-lib.git torch_scatter torch_sparse torch_cluster torch-geometric

RUN git clone --recursive "https://github.com/NVIDIA/MinkowskiEngine"
COPY git.patch /workspace/MinkowskiEngine/git.patch
RUN cd MinkowskiEngine && \
    git apply git.patch && \
    python setup.py install --force_cuda --blas=openblas

## git.patch
diff --git a/src/3rdparty/concurrent_unordered_map.cuh b/src/3rdparty/concurrent_unordered_map.cuh
index ed8e1b2..ff43515 100644
--- a/src/3rdparty/concurrent_unordered_map.cuh
+++ b/src/3rdparty/concurrent_unordered_map.cuh
@@ -27,6 +27,8 @@

 #include <thrust/pair.h>
 #include <thrust/count.h>
+#include <thrust/device_vector.h>
+#include <thrust/execution_policy.h>

 #include <functional>
 #include <memory>
diff --git a/src/coordinate_map_gpu.cu b/src/coordinate_map_gpu.cu
index fb7325d..f3fafac 100644
--- a/src/coordinate_map_gpu.cu
+++ b/src/coordinate_map_gpu.cu
@@ -36,6 +36,8 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/sort.h>
+#include <thrust/remove.h>
+#include <thrust/unique.h>

 namespace minkowski {

diff --git a/src/spmm.cu b/src/spmm.cu
index 8891a56..aa69000 100644
--- a/src/spmm.cu
+++ b/src/spmm.cu
@@ -35,6 +35,9 @@
 #include <c10/cuda/CUDACachingAllocator.h>
 #include <torch/extension.h>
 #include <torch/script.h>
+#include <thrust/sort.h>
+#include <thrust/remove.h>
+#include <thrust/unique.h>

 namespace minkowski {
	NVC_TAG=23.12
	NERSC_TAG="ngc-${NVC_TAG}-v0"
	IMAGE_LABEL="ncarrara"

	docker build --platform linux/amd64 --build-arg nvc_tag=$NVC_TAG-py3 -t $IMAGE_LABEL/pytorch:$NERSC_TAG -f Dockerfile .
	ARG nvc_tag
	FROM nvcr.io/nvidia/pytorch:$nvc_tag

	RUN python -m pip install --no-cache-dir -U pip
	RUN apt-get update && apt-get install -y rsync strace

	# Install parallel HDF5
	RUN wget https://github.com/HDFGroup/hdf5/archive/refs/tags/hdf5-1_12_3.tar.gz && \
	tar -xvf hdf5-1_12_3.tar.gz && \
	cd hdf5-hdf5-1_12_3 && \
	./configure --enable-parallel --enable-shared --prefix=/opt/bin/hdf5 && \
	make -j 8 && \
	make install && \
	rm -rf hdf5-hdf5-1_12_3

	# use pip to install and build h5py on top of parallel hdf5
	RUN HDF5_MPI=ON CC=mpicc HDF5_DIR=/opt/bin/hdf5 pip install --no-deps --no-binary=h5py h5py

	# install other python stuff necessary
	RUN pip install --no-cache-dir \
	ruamel.yaml pyyaml cmake ipympl pillow wandb torchsummary pandas \
	jupyter-server-proxy requests tabulate ray ray[tune] ray[rllib] \
	scikit-image mpi4py deepspeed einops lightning \
	git+https://github.com/NERSC/nersc-tensorboard-helper.git

	# Install PyG
	RUN FORCE_CUDA=1 \
	TORCH_CUDA_ARCH_LIST="8.0" \
	CPATH=/usr/local/cuda/include \
	pip install --no-cache-dir git+https://github.com/pyg-team/pyg-lib.git torch_scatter torch_sparse torch_cluster torch-geometric

	RUN git clone --recursive "https://github.com/NVIDIA/MinkowskiEngine"
	COPY git.patch /workspace/MinkowskiEngine/git.patch
	RUN cd MinkowskiEngine && \
	git apply git.patch && \
	python setup.py install --force_cuda --blas=openblas
	diff --git a/src/3rdparty/concurrent_unordered_map.cuh b/src/3rdparty/concurrent_unordered_map.cuh
	index ed8e1b2..ff43515 100644
	--- a/src/3rdparty/concurrent_unordered_map.cuh
	+++ b/src/3rdparty/concurrent_unordered_map.cuh
	@@ -27,6 +27,8 @@

	#include <thrust/pair.h>
	#include <thrust/count.h>
	+#include <thrust/device_vector.h>
	+#include <thrust/execution_policy.h>

	#include <functional>
	#include <memory>
	diff --git a/src/coordinate_map_gpu.cu b/src/coordinate_map_gpu.cu
	index fb7325d..f3fafac 100644
	--- a/src/coordinate_map_gpu.cu
	+++ b/src/coordinate_map_gpu.cu
	@@ -36,6 +36,8 @@
	#include <thrust/iterator/counting_iterator.h>
	#include <thrust/iterator/transform_iterator.h>
	#include <thrust/sort.h>
	+#include <thrust/remove.h>
	+#include <thrust/unique.h>

	namespace minkowski {

	diff --git a/src/spmm.cu b/src/spmm.cu
	index 8891a56..aa69000 100644
	--- a/src/spmm.cu
	+++ b/src/spmm.cu
	@@ -35,6 +35,9 @@
	#include <c10/cuda/CUDACachingAllocator.h>
	#include <torch/extension.h>
	#include <torch/script.h>
	+#include <thrust/sort.h>
	+#include <thrust/remove.h>
	+#include <thrust/unique.h>

	namespace minkowski {