Last active
June 26, 2024 12:40
-
-
Save SteelPh0enix/e66808b99f00db22bfec951058a01c2e to your computer and use it in GitHub Desktop.
llama.cpp PowerShell utils
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Collection of variables, aliases and Functions to work w/ llama.cpp | |
# Source to activate. | |
# DO NOT source in VS command prompt for building llama.cpp, use this for building: | |
# https://gist.github.com/SteelPh0enix/8651ed5a6ea571b1cd11b8c9fa47ac47 | |
# HARDCODED VALUES - MAKE SURE TO TUNE THEM FOR YOUR SYSTEM! | |
$Env:ROCM_VERSION = "5.7.1" | |
$Env:USE_ROCM = 1 | |
$Env:HIP_PLATFORM = "amd" | |
$Env:GPU_ARCHS = "gfx1100" | |
$Env:HSA_OVERRIDE_GFX_VERSION = "11.0.0" | |
$Env:TF_PYTHON_VERSION = "3.12" | |
# HIP_PATH should be set by ROCm installer | |
$Env:ROCM_PATH = $Env:HIP_PATH | |
$Env:GFX_ARCH = $Env:GPU_ARCHS | |
$Env:AMDGPU_TARGETS = $Env:GPU_ARCHS | |
$Env:PYTORCH_ROCM_ARCH = $Env:GPU_ARCHS | |
$Env:TF_ROCM_AMDGPU_TARGETS = $Env:GPU_ARCHS | |
# llama.cpp-related variables (tweak if necessary) | |
$Env:LLAMA_CPP_PATH = "F:/AI/llama.cpp" | |
$Env:LLAMA_CPP_SERVER_ADDRESS = "localhost" | |
$Env:LLAMA_CPP_SERVER_PORT = "51536" | |
$Env:LLAMA_CPP_SERVER_URL = "http://${Env:LLAMA_CPP_SERVER_ADDRESS}:${Env:LLAMA_CPP_SERVER_PORT}/" | |
$Env:LLAMA_CPP_SERVER_CTX_SIZE = "10240" | |
$Env:LLAMA_CPP_SERVER_GPU_LAYERS = 999 | |
$Env:PATH = "${Env:PATH};${Env:LLAMA_CPP_PATH}/build/bin" | |
$Env:PYTHONPATH = "${Env:LLAMA_CPP_PATH}/gguf-py;${Env:PYTHONPATH}" | |
# System-related variables | |
$logical_cores_amount = (Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors | |
# generic llm-related functions | |
Function llm-server($model_path) { | |
echo "Running $model_path using llama-server @ $Env:LLAMA_CPP_SERVER_URL w/ $logical_cores_amount CPU cores, $Env:LLAMA_CPP_SERVER_GPU_LAYERS GPU layers, and $Env:LLAMA_CPP_SERVER_CTX_SIZE context size" | |
llama-server --threads $logical_cores_amount --mlock --gpu-layers $Env:LLAMA_CPP_SERVER_GPU_LAYERS --ctx-size $Env:LLAMA_CPP_SERVER_CTX_SIZE --port $Env:LLAMA_CPP_SERVER_PORT --flash-attn --model $model_path | |
} | |
# llama.cpp management functions | |
Function llm-llama-clone { | |
echo "Pulling llama.cpp repository to $Env:LLAMA_CPP_PATH" | |
git clone git@github.com:ggerganov/llama.cpp.git $Env:LLAMA_CPP_PATH | |
$og_pwd = Get-Location | |
cd $Env:LLAMA_CPP_PATH | |
git submodule update --init --recursive | |
cd $og_pwd | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment