Created
July 14, 2023 23:00
-
-
Save ianscrivener/71bde7a2bfc92e8d217900229d78df51 to your computer and use it in GitHub Desktop.
setup NVidia GPU Docker for llama.cpp and run perplexity test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# BTW: we are running in a nvidia/cuda:11.x.x-devel-ubuntu22.04 | |
# install some extra Ubuntu packages | |
apt install unzip libopenblas-dev nano git-lfs aria2c jq build-essential python3 python3-pip git -y | |
pip install --upgrade pip setuptools wheel | |
# clone llama.cpp repo | |
cd /workspace | |
git clone https://github.com/ggerganov/llama.cpp.git | |
# setup & build llama.cpp | |
cd /workspace/llama.cpp | |
pip install -r requirements.txt | |
pip install --upgrade pip | |
make LLAMA_CUBLAS=1 -j | |
## get Open Llama 3B v1 | |
# mkdir -p /workspace/3b_open_llama_v1 | |
# wget -O /workspace/3b_open_llama_v1/pytorch_model.bin https://huggingface.co/openlm-research/open_llama_3b/resolve/main/pytorch_model.bin | |
# wget -O /workspace/3b_open_llama_v1/tokenizer.model https://huggingface.co/openlm-research/open_llama_3b/resolve/main/tokenizer.model | |
# ls /workspace/3b_open_llama_v1 | |
# | |
## convert model to ggml F16 format & delete original models | |
# cd /workspace/llama.cpp | |
# python3 convert.py /workspace/3b_open_llama_v1 | |
# rm /workspace/3b_open_llama_v1/pytorch_model.bin | |
# rm /workspace/3b_open_llama_v1/tokenizer.model | |
# ls /workspace/3b_open_llama_v1 | |
# | |
## quantize to q4_0 | |
# cd /workspace/llama.cpp | |
# ./quantize /workspace/3b_open_llama_v1/ggml-model-f16.bin /workspace/3b_open_llama_v1/ggml-model-q4_0.bin q4_0 | |
# get quantized models | |
mkdir -p /workspace/7b_open_llama_v1 | |
cd /workspace/7b_open_llama_v1 | |
# wget https://huggingface.co/SlyEcho/open_llama_7b_ggml/resolve/main/open-llama-7b-q2_K.bin | |
# wget https://huggingface.co/SlyEcho/open_llama_7b_ggml/resolve/main/open-llama-7b-q3_K_S.bin | |
# wget https://huggingface.co/SlyEcho/open_llama_7b_ggml/resolve/main/open-llama-7b-q3_K.bin | |
# wget https://huggingface.co/SlyEcho/open_llama_7b_ggml/resolve/main/open-llama-7b-q3_K_L.bin | |
# wget https://huggingface.co/SlyEcho/open_llama_7b_ggml/resolve/main/open-llama-7b-q4_0.bin | |
# wget https://huggingface.co/SlyEcho/open_llama_7b_ggml/resolve/main/open-llama-7b-q4_1.bin | |
# wget https://huggingface.co/SlyEcho/open_llama_7b_ggml/resolve/main/open-llama-7b-q4_K_S.bin | |
# wget https://huggingface.co/SlyEcho/open_llama_7b_ggml/resolve/main/open-llama-7b-q4_K.bin | |
# wget https://huggingface.co/SlyEcho/open_llama_7b_ggml/resolve/main/open-llama-7b-q5_0.bin | |
# wget https://huggingface.co/SlyEcho/open_llama_7b_ggml/resolve/main/open-llama-7b-q5_K_S.bin | |
# wget https://huggingface.co/SlyEcho/open_llama_7b_ggml/resolve/main/open-llama-7b-q5_1.bin | |
wget https://huggingface.co/SlyEcho/open_llama_7b_ggml/resolve/main/open-llama-7b-q5_K.bin | |
# wget https://huggingface.co/SlyEcho/open_llama_7b_ggml/resolve/main/open-llama-7b-q6_K.bin | |
# wget https://huggingface.co/SlyEcho/open_llama_7b_ggml/resolve/main/open-llama-7b-q8_0.bin | |
# wget https://huggingface.co/SlyEcho/open_llama_7b_ggml/resolve/main/open-llama-7b-f16.bin | |
ls /workspace/7b_open_llama_v1 | |
# get wiki text | |
cd /workspace | |
wget https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip | |
unzip wikitext-2-raw-v1.zip | |
rm wikitext-2-raw-v1.zip | |
cd /workspace/wikitext-2-raw | |
head -n406 wiki.test.raw > wiki.test.raw.406 | |
head -n103 wiki.test.raw > wiki.test.raw.103 | |
# run perplexity | |
cd /workspace/llama.cpp | |
export model="/workspace/7b_open_llama_v1/open-llama-7b-q5_K.bin" | |
export corpus="/workspace/wikitext-2-raw/wiki.test.raw.103" | |
export context=512 | |
export batch=512 | |
export threads=8 | |
export gpu=24 | |
./perplexity -m $model -f $corpus -c $context -b $batch -t $threads -ngl $gpu | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment