steren/llamafile.Dockerfile

## llamafile.Dockerfile
FROM debian:latest
RUN apt-get update && apt-get install -y wget

# Update this to the URL pointing at the llamafile you want to run.
# Find other models at https://github.com/Mozilla-Ocho/llamafile?tab=readme-ov-file#other-example-llamafiles
ENV LLAMAFILE_DOWNLOAD_URL="https://huggingface.co/jartine/Mistral-7B-Instruct-v0.2-llamafile/resolve/main/mistral-7b-instruct-v0.2.Q4_0.llamafile?download=true"


# Download the llamafile and make it executable
RUN wget $LLAMAFILE_DOWNLOAD_URL -O ./model.llamafile && chmod +x ./model.llamafile

# Use the llamafile executable as container start command
#ENTRYPOINT ["./model.llamafile"]
# Use GPU maximize the number of layers sent to GPU, listen on 0.0.0.0, do not attempt to start a browser
#CMD ["--gpu", "nvidia", "-ngl", "9999","--host", "0.0.0.0", "--nobrowser"]

# TODO use proper ENTRYPOINT and CMD
ENTRYPOINT ./model.llamafile --gpu nvidia -ngl 9999 --host 0.0.0.0 --nobrowser
	FROM debian:latest
	RUN apt-get update && apt-get install -y wget

	# Update this to the URL pointing at the llamafile you want to run.
	# Find other models at https://github.com/Mozilla-Ocho/llamafile?tab=readme-ov-file#other-example-llamafiles
	ENV LLAMAFILE_DOWNLOAD_URL="https://huggingface.co/jartine/Mistral-7B-Instruct-v0.2-llamafile/resolve/main/mistral-7b-instruct-v0.2.Q4_0.llamafile?download=true"


	# Download the llamafile and make it executable
	RUN wget $LLAMAFILE_DOWNLOAD_URL -O ./model.llamafile && chmod +x ./model.llamafile

	# Use the llamafile executable as container start command
	#ENTRYPOINT ["./model.llamafile"]
	# Use GPU maximize the number of layers sent to GPU, listen on 0.0.0.0, do not attempt to start a browser
	#CMD ["--gpu", "nvidia", "-ngl", "9999","--host", "0.0.0.0", "--nobrowser"]

	# TODO use proper ENTRYPOINT and CMD
	ENTRYPOINT ./model.llamafile --gpu nvidia -ngl 9999 --host 0.0.0.0 --nobrowser