Last active
June 28, 2024 16:21
-
-
Save SteelPh0enix/760107a1749df8203fd7b0943fcb5976 to your computer and use it in GitHub Desktop.
llama.cpp shell utils
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/zsh | |
# Collection of variables, aliases and functions to work w/ llama.cpp | |
# Source to activate. | |
# HARDCODED VALUES - MAKE SURE TO TUNE THEM FOR YOUR SYSTEM! | |
export ROCM_VERSION="6.0.2" | |
export USE_ROCM=1 | |
export HIP_PLATFORM="amd" | |
export GPU_ARCHS="gfx1100" | |
export HSA_OVERRIDE_GFX_VERSION="11.0.0" | |
export ROCM_PATH="/opt/rocm" | |
export TF_PYTHON_VERSION="3.12" | |
export USE_SYMENGINE=1 | |
# llama.cpp-related variables (tweak if necessary) | |
export LLAMA_CPP_PATH="${HOME}/llama.cpp" | |
export LLAMA_CPP_SERVER_ADDRESS="localhost" | |
export LLAMA_CPP_SERVER_PORT="51536" | |
export LLAMA_CPP_SERVER_URL="http://${LLAMA_CPP_SERVER_ADDRESS}:${LLAMA_CPP_SERVER_PORT}/" | |
export LLAMA_CPP_SERVER_CTX_SIZE=10240 | |
export LLAMA_CPP_SERVER_BATCH_SIZE=4096 | |
export LLAMA_CPP_SERVER_UBATCH_SIZE=2048 | |
export LLAMA_CPP_SERVER_GPU_LAYERS=999 | |
# test with LLAMA_HIP_UMA=ON in systems with iGPUs to check out how it behaves | |
export LLAMA_CPP_CMAKE_ARGS="-DGGML_NATIVE=ON -DGGML_HIPBLAS=ON -DGGML_CCACHE=OFF -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_OPENMP=ON -DAMDGPU_TARGETS=${GPU_ARCHS} -DGGML_CUDA_DMMV_X=128 -DGGML_CUDA_MMV_Y=4 -DLLAMA_CURL=ON -DLLAMA_STANDALONE=ON" | |
export LLAMA_CPP_DEFAULT_PROMPT="You are an AI assistant that follows instruction extremely well. Help as much as you can.\n" | |
export PATH="${PATH}:${LLAMA_CPP_PATH}/build/bin" | |
export PYTHONPATH="${PYTHONPATH}:${LLAMA_CPP_PATH}/gguf-py" | |
# System-related variables | |
export NUMBER_OF_CORES=$(grep -c ^processor /proc/cpuinfo) | |
# ROCm-related variables | |
export GFX_ARCH=$GPU_ARCHS | |
export PYTORCH_ROCM_ARCH=$GPU_ARCHS | |
export TF_ROCM_AMDGPU_TARGETS=$GPU_ARCHS | |
export ROCM_INSTALL_DIR=$ROCM_PATH | |
export ROCM_TOOLKIT_PATH=$ROCM_PATH | |
export HIP_PATH=$ROCM_PATH | |
export HIPCXX="${ROCM_PATH}/llvm/bin/clang" | |
export PATH="${PATH}:${HIP_PATH}" | |
export CPATH="${CPATH}:${ROCM_PATH}/include" | |
export C_INCLUDE_PATH="${C_INCLUDE_PATH}:${ROCM_PATH}/include" | |
export CPLUS_INCLUDE_PATH="${CPLUS_INCLUDE_PATH}:${ROCM_PATH}/include" | |
# generic llm-related functions | |
function llm-server() { | |
echo "Running $1 using llama-server @ ${LLAMA_CPP_SERVER_URL} w/ ${NUMBER_OF_CORES} CPU cores, ${LLAMA_CPP_SERVER_GPU_LAYERS} GPU layers, and ${LLAMA_CPP_SERVER_CTX_SIZE} context size" | |
llama-server --color --log-enable --batch-size ${LLAMA_CPP_SERVER_BATCH_SIZE} --ubatch-size ${LLAMA_CPP_SERVER_UBATCH_SIZE} --threads ${NUMBER_OF_CORES} --mlock --gpu-layers ${LLAMA_CPP_SERVER_GPU_LAYERS} --ctx-size ${LLAMA_CPP_SERVER_CTX_SIZE} --port ${LLAMA_CPP_SERVER_PORT} --flash-attn --model $1 | |
} | |
function llm-cli() { | |
local llama_prompt=${2:-${LLAMA_CPP_DEFAULT_PROMPT}} | |
echo "Running $1 using llama-server @ ${LLAMA_CPP_SERVER_URL} w/ ${NUMBER_OF_CORES} CPU cores, ${LLAMA_CPP_SERVER_GPU_LAYERS} GPU layers, and ${LLAMA_CPP_SERVER_CTX_SIZE} context size" | |
llama-cli --color --log-enable --batch-size ${LLAMA_CPP_SERVER_BATCH_SIZE} --ubatch-size ${LLAMA_CPP_SERVER_UBATCH_SIZE} --threads ${NUMBER_OF_CORES} --mlock --gpu-layers ${LLAMA_CPP_SERVER_GPU_LAYERS} --ctx-size ${LLAMA_CPP_SERVER_CTX_SIZE} --port ${LLAMA_CPP_SERVER_PORT} --flash-attn --interactive-first --prompt $llama_prompt --model $1 | |
} | |
function llm-quantize-model() { | |
local model_path=$1 | |
local quantization=${2:-auto} | |
local output_path=${3:-.} | |
# Extract the model name from the model_path | |
local model_name=$(basename $model_path) | |
# Check if the model_path exists | |
if [ ! -d "$model_path" ]; then | |
echo "Error: Model directory '$model_path' does not exist." | |
return 1 | |
fi | |
# Run the conversion command | |
python $LLAMA_CPP_PATH/convert-hf-to-gguf.py --outtype $quantization --outfile $output_path/$model_name.$quantization.gguf $model_path | |
# Check if the conversion was successful | |
if [ $? -eq 0 ]; then | |
echo "Model '$model_path' successfully quantized to $quantization format and saved as $output_path/$model_name.$quantization.gguf" | |
else | |
echo "Error: Failed to quantize model '$model_path'." | |
fi | |
} | |
function llm-requantize-model() { | |
local model_name=$1 | |
local output_quantization=$2 | |
local input_quantization=${3:-auto} | |
local num_threads=$(/usr/bin/nproc) | |
if [[ -f "${model_name}.${input_quantization}.gguf" ]]; then | |
llama-quantize "${model_name}.${input_quantization}.gguf" "${model_name}.${output_quantization}.gguf" "$output_quantization" "$num_threads" | |
echo "Model $model_name requantized successfully from $input_quantization to $output_quantization." | |
else | |
echo "Error: File ${model_name}.${input_quantization}.gguf not found." | |
return 1 | |
fi | |
} | |
# llama.cpp management functions | |
function llm-llama-clone() { | |
echo "Pulling llama.cpp repository to ${LLAMA_CPP_PATH}" | |
git clone git@github.com:ggerganov/llama.cpp.git $LLAMA_CPP_PATH | |
local og_pwd=$(pwd) | |
cd $LLAMA_CPP_PATH | |
git submodule update --init --recursive | |
git lfs pull | |
cd $og_pwd | |
} | |
function llm-llama-update() { | |
echo "Pulling latest llama.cpp commit..." | |
local og_pwd=$(pwd) | |
cd $LLAMA_CPP_PATH | |
git pull | |
cd $og_pwd | |
} | |
function llm-llama-clean() { | |
echo "Clearing llama.cpp repository from any build artifacts and junk..." | |
local og_pwd=$(pwd) | |
cd $LLAMA_CPP_PATH | |
git clean -xddf | |
cd $og_pwd | |
} | |
function llm-llama-clean-update() { | |
llm-llama-clean | |
llm-llama-update | |
} | |
function llm-llama-build() { | |
local og_pwd=$(pwd) | |
cd $LLAMA_CPP_PATH | |
echo "Generating build files (CMake arguments: ${LLAMA_CPP_CMAKE_ARGS})" | |
cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release ${(z)LLAMA_CPP_CMAKE_ARGS} | |
echo "Building llama.cpp..." | |
cmake --build build --config Release | |
cd $og_pwd | |
} | |
function llm-llama-clean-build() { | |
llm-llama-clean | |
llm-llama-build | |
} | |
function llm-llama-clean-update-build() { | |
llm-llama-clean | |
llm-llama-update | |
llm-llama-build | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment