Last active
May 21, 2024 13:46
-
-
Save tristandruyen/941d2e0526e4aedfa026e4e53411a4dc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env fish | |
# Invoke like './autoquant-hf-bf16.fish meta-llama/Meta-Llama-3-8B-Instruct' | |
# TODOS: | |
# TODO Just a rough outline of automatic script atm, mostly example commands, no real automation done, needs params etc | |
# TODO Create hf repo automatically using CLI | |
# TODO Select good quant types | |
# TODO Decide repo name template (IQ-Imatrix, iMat.GGUF and more seem to be in use) | |
# TODO Parallelize upload while still quanting | |
function run_cmd | |
set tools_path ~/ai/tools | |
echo :: $argv :: &>> $tools_path/autoquant.log | |
eval $argv &>> $tools_path/autoquant.log | |
# $argv : | |
end | |
set hf_path (echo $argv | string split /) | |
set model_org $hf_path[1] | |
set model_name $hf_path[2] | |
set tools_path ~/ai/tools | |
set llama_cpp_path ~/ai/tools/llama.cpp | |
set model_path ~/ai/models | |
set clone_path (echo -n $model_path/$model_name) | |
set out_path (echo -n $model_path/$model_name-iMat-GGUF) | |
# Imatrix GGUF quants for $(echo $model_name) | |
# Base⇢ GGUF(BF16)⇢ Imatrix-Data(BF16)⇢ GGUF(Imatrix-Quants) | |
echo "Running $model_org/$model_name===========================================" | |
echo "Cloning $model_org/$model_name" | |
echo "Cloning to $clone_path" | |
run_cmd GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/$model_org/$model_name $clone_path | |
echo "Pulling lfs files" | |
run_cmd cd $clone_path | |
run_cmd "git lfs pull" | |
echo "Converting to bf16 gguf" | |
echo "Writing result files to $out_path" | |
run_cmd mkdir $out_path | |
run_cmd cd $out_path | |
set base_quant_name (echo -n $model_name | string lower)-bf16.gguf | |
run_cmd nix develop $llama_cpp_path#default-extra --command \ | |
python3 $llama_cpp_path/convert-hf-to-gguf.py --outtype bf16 \ | |
--outfile $out_path/$base_quant_name \ | |
$clone_path | |
echo . | |
echo "Generate imatrix" | |
run_cmd nix run $llama_cpp_path#imatrix -- \ | |
-c 512 -t 12 \ | |
-m $out_path/$base_quant_name \ | |
-f $llama_cpp_path/groups_merged.txt \ | |
-o $out_path/imat-bf16-gmerged.dat | |
echo . | |
for quant in IQ1_S IQ2_M IQ3_M Q4_K_M Q5_K_M Q6_K IQ4_NL IQ2_S IQ2_XS IQ2_XXS IQ3_S IQ3_XS IQ3_XXS IQ4_XS Q4_K_S Q5_K_S Q8_0 Q4_0; | |
echo "==================================================================" | |
set out_name (echo $model_name | string lower)-imat-$quant.gguf | |
echo Quantizing $out_name | |
run_cmd nix run $llama_cpp_path#quantize -- \ | |
--imatrix $out_path/imat-bf16-gmerged.dat \ | |
$out_path/$base_quant_name \ | |
$out_path/$out_name \ | |
$quant | |
echo Done with $out_name | |
end | |
cd $tools_path |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env fish | |
# TODOS: | |
# TODO auto-detecet files over 50GB to split instead of listing quants | |
# TODO parameterize/auto-detect model name | |
nix run ../../tools/llama.cpp#gguf-split -- --split-max-size 48G ./c4ai-command-r-plus-f16.gguf ./c4ai-command-r-plus-f16.split && \ | |
rm ./c4ai-command-r-plus-imat-$quant.split | |
for quant in IQ4_NL IQ4_XS Q4_0 Q4_K_M Q4_K_S Q5_K_M Q5_K_S Q6_K Q8_0 | |
nix run ../../tools/llama.cpp#gguf-split -- --split-max-size 48G ./c4ai-command-r-plus-imat-$quant.gguf ./c4ai-command-r-plus-imat-$quant.split && \ | |
rm ./c4ai-command-r-plus-imat-$quant.split | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment