Skip to content

Instantly share code, notes, and snippets.

@tristandruyen
Last active May 21, 2024 13:46
Show Gist options
  • Save tristandruyen/941d2e0526e4aedfa026e4e53411a4dc to your computer and use it in GitHub Desktop.
Save tristandruyen/941d2e0526e4aedfa026e4e53411a4dc to your computer and use it in GitHub Desktop.
#!/usr/bin/env fish
# Invoke like './autoquant-hf-bf16.fish meta-llama/Meta-Llama-3-8B-Instruct'
# TODOS:
# TODO Just a rough outline of automatic script atm, mostly example commands, no real automation done, needs params etc
# TODO Create hf repo automatically using CLI
# TODO Select good quant types
# TODO Decide repo name template (IQ-Imatrix, iMat.GGUF and more seem to be in use)
# TODO Parallelize upload while still quanting
function run_cmd
set tools_path ~/ai/tools
echo :: $argv :: &>> $tools_path/autoquant.log
eval $argv &>> $tools_path/autoquant.log
# $argv :
end
set hf_path (echo $argv | string split /)
set model_org $hf_path[1]
set model_name $hf_path[2]
set tools_path ~/ai/tools
set llama_cpp_path ~/ai/tools/llama.cpp
set model_path ~/ai/models
set clone_path (echo -n $model_path/$model_name)
set out_path (echo -n $model_path/$model_name-iMat-GGUF)
# Imatrix GGUF quants for $(echo $model_name)
# Base⇢ GGUF(BF16)⇢ Imatrix-Data(BF16)⇢ GGUF(Imatrix-Quants)
echo "Running $model_org/$model_name==========================================="
echo "Cloning $model_org/$model_name"
echo "Cloning to $clone_path"
run_cmd GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/$model_org/$model_name $clone_path
echo "Pulling lfs files"
run_cmd cd $clone_path
run_cmd "git lfs pull"
echo "Converting to bf16 gguf"
echo "Writing result files to $out_path"
run_cmd mkdir $out_path
run_cmd cd $out_path
set base_quant_name (echo -n $model_name | string lower)-bf16.gguf
run_cmd nix develop $llama_cpp_path#default-extra --command \
python3 $llama_cpp_path/convert-hf-to-gguf.py --outtype bf16 \
--outfile $out_path/$base_quant_name \
$clone_path
echo .
echo "Generate imatrix"
run_cmd nix run $llama_cpp_path#imatrix -- \
-c 512 -t 12 \
-m $out_path/$base_quant_name \
-f $llama_cpp_path/groups_merged.txt \
-o $out_path/imat-bf16-gmerged.dat
echo .
for quant in IQ1_S IQ2_M IQ3_M Q4_K_M Q5_K_M Q6_K IQ4_NL IQ2_S IQ2_XS IQ2_XXS IQ3_S IQ3_XS IQ3_XXS IQ4_XS Q4_K_S Q5_K_S Q8_0 Q4_0;
echo "=================================================================="
set out_name (echo $model_name | string lower)-imat-$quant.gguf
echo Quantizing $out_name
run_cmd nix run $llama_cpp_path#quantize -- \
--imatrix $out_path/imat-bf16-gmerged.dat \
$out_path/$base_quant_name \
$out_path/$out_name \
$quant
echo Done with $out_name
end
cd $tools_path
#!/usr/bin/env fish
# TODOS:
# TODO auto-detecet files over 50GB to split instead of listing quants
# TODO parameterize/auto-detect model name
nix run ../../tools/llama.cpp#gguf-split -- --split-max-size 48G ./c4ai-command-r-plus-f16.gguf ./c4ai-command-r-plus-f16.split && \
rm ./c4ai-command-r-plus-imat-$quant.split
for quant in IQ4_NL IQ4_XS Q4_0 Q4_K_M Q4_K_S Q5_K_M Q5_K_S Q6_K Q8_0
nix run ../../tools/llama.cpp#gguf-split -- --split-max-size 48G ./c4ai-command-r-plus-imat-$quant.gguf ./c4ai-command-r-plus-imat-$quant.split && \
rm ./c4ai-command-r-plus-imat-$quant.split
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment