tristandruyen/autoquant-hf-bf16.fish

## autoquant-hf-bf16.fish
#!/usr/bin/env fish

# Invoke like './autoquant-hf-bf16.fish meta-llama/Meta-Llama-3-8B-Instruct'

# TODOS:
# TODO Just a rough outline of automatic script atm, mostly example commands, no real automation done, needs params etc
# TODO Create hf repo automatically using CLI
# TODO Select good quant types
# TODO Decide repo name template (IQ-Imatrix, iMat.GGUF and more seem to be in use)
# TODO Parallelize upload while still quanting

function run_cmd
    set tools_path ~/ai/tools
    echo :: $argv :: &>> $tools_path/autoquant.log
    eval $argv &>> $tools_path/autoquant.log
    # $argv :
end

set hf_path (echo $argv | string split /)
set model_org $hf_path[1]
set model_name $hf_path[2]

set tools_path ~/ai/tools
set llama_cpp_path ~/ai/tools/llama.cpp
set model_path ~/ai/models
set clone_path (echo -n $model_path/$model_name)
set out_path (echo -n $model_path/$model_name-iMat-GGUF)


# Imatrix GGUF quants for $(echo $model_name)
# Base⇢ GGUF(BF16)⇢ Imatrix-Data(BF16)⇢ GGUF(Imatrix-Quants)
echo "Running $model_org/$model_name==========================================="
echo "Cloning $model_org/$model_name"
echo "Cloning to $clone_path"
run_cmd GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/$model_org/$model_name $clone_path

echo "Pulling lfs files"
run_cmd cd $clone_path
run_cmd "git lfs pull"

echo "Converting to bf16 gguf"
echo "Writing result files to $out_path"
run_cmd mkdir $out_path
run_cmd cd $out_path

set base_quant_name (echo -n $model_name | string lower)-bf16.gguf

run_cmd nix develop $llama_cpp_path#default-extra --command \
    python3 $llama_cpp_path/convert-hf-to-gguf.py --outtype bf16 \
        --outfile $out_path/$base_quant_name \
        $clone_path
echo .

echo "Generate imatrix"
run_cmd nix run $llama_cpp_path#imatrix -- \
                  -c 512 -t 12 \
                  -m $out_path/$base_quant_name \
                  -f $llama_cpp_path/groups_merged.txt \
                  -o $out_path/imat-bf16-gmerged.dat
echo .

for quant in IQ1_S IQ2_M IQ3_M Q4_K_M Q5_K_M Q6_K IQ4_NL IQ2_S IQ2_XS IQ2_XXS IQ3_S IQ3_XS IQ3_XXS IQ4_XS Q4_K_S Q5_K_S Q8_0 Q4_0;
      echo "=================================================================="

      set out_name (echo $model_name | string lower)-imat-$quant.gguf

      echo Quantizing $out_name

      run_cmd nix run $llama_cpp_path#quantize -- \
          --imatrix $out_path/imat-bf16-gmerged.dat \
                    $out_path/$base_quant_name \
                    $out_path/$out_name \
                    $quant
      echo Done with $out_name
end

cd $tools_path

## split-for-hf.fish
#!/usr/bin/env fish

# TODOS:
# TODO auto-detecet files over 50GB to split instead of listing quants
# TODO parameterize/auto-detect model name

nix run ../../tools/llama.cpp#gguf-split -- --split-max-size 48G ./c4ai-command-r-plus-f16.gguf ./c4ai-command-r-plus-f16.split && \
  rm ./c4ai-command-r-plus-imat-$quant.split

for quant in IQ4_NL IQ4_XS Q4_0 Q4_K_M Q4_K_S Q5_K_M Q5_K_S Q6_K Q8_0
    nix run ../../tools/llama.cpp#gguf-split -- --split-max-size 48G ./c4ai-command-r-plus-imat-$quant.gguf ./c4ai-command-r-plus-imat-$quant.split && \
        rm ./c4ai-command-r-plus-imat-$quant.split
end
	#!/usr/bin/env fish

	# Invoke like './autoquant-hf-bf16.fish meta-llama/Meta-Llama-3-8B-Instruct'

	# TODOS:
	# TODO Just a rough outline of automatic script atm, mostly example commands, no real automation done, needs params etc
	# TODO Create hf repo automatically using CLI
	# TODO Select good quant types
	# TODO Decide repo name template (IQ-Imatrix, iMat.GGUF and more seem to be in use)
	# TODO Parallelize upload while still quanting

	function run_cmd
	set tools_path ~/ai/tools
	echo :: $argv :: &>> $tools_path/autoquant.log
	eval $argv &>> $tools_path/autoquant.log
	# $argv :
	end

	set hf_path (echo $argv \| string split /)
	set model_org $hf_path[1]
	set model_name $hf_path[2]

	set tools_path ~/ai/tools
	set llama_cpp_path ~/ai/tools/llama.cpp
	set model_path ~/ai/models
	set clone_path (echo -n $model_path/$model_name)
	set out_path (echo -n $model_path/$model_name-iMat-GGUF)


	# Imatrix GGUF quants for $(echo $model_name)
	# Base⇢ GGUF(BF16)⇢ Imatrix-Data(BF16)⇢ GGUF(Imatrix-Quants)
	echo "Running $model_org/$model_name==========================================="
	echo "Cloning $model_org/$model_name"
	echo "Cloning to $clone_path"
	run_cmd GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/$model_org/$model_name $clone_path

	echo "Pulling lfs files"
	run_cmd cd $clone_path
	run_cmd "git lfs pull"

	echo "Converting to bf16 gguf"
	echo "Writing result files to $out_path"
	run_cmd mkdir $out_path
	run_cmd cd $out_path

	set base_quant_name (echo -n $model_name \| string lower)-bf16.gguf

	run_cmd nix develop $llama_cpp_path#default-extra --command \
	python3 $llama_cpp_path/convert-hf-to-gguf.py --outtype bf16 \
	--outfile $out_path/$base_quant_name \
	$clone_path
	echo .

	echo "Generate imatrix"
	run_cmd nix run $llama_cpp_path#imatrix -- \
	-c 512 -t 12 \
	-m $out_path/$base_quant_name \
	-f $llama_cpp_path/groups_merged.txt \
	-o $out_path/imat-bf16-gmerged.dat
	echo .

	for quant in IQ1_S IQ2_M IQ3_M Q4_K_M Q5_K_M Q6_K IQ4_NL IQ2_S IQ2_XS IQ2_XXS IQ3_S IQ3_XS IQ3_XXS IQ4_XS Q4_K_S Q5_K_S Q8_0 Q4_0;
	echo "=================================================================="

	set out_name (echo $model_name \| string lower)-imat-$quant.gguf

	echo Quantizing $out_name

	run_cmd nix run $llama_cpp_path#quantize -- \
	--imatrix $out_path/imat-bf16-gmerged.dat \
	$out_path/$base_quant_name \
	$out_path/$out_name \
	$quant
	echo Done with $out_name
	end

	cd $tools_path
	#!/usr/bin/env fish

	# TODOS:
	# TODO auto-detecet files over 50GB to split instead of listing quants
	# TODO parameterize/auto-detect model name

	nix run ../../tools/llama.cpp#gguf-split -- --split-max-size 48G ./c4ai-command-r-plus-f16.gguf ./c4ai-command-r-plus-f16.split && \
	rm ./c4ai-command-r-plus-imat-$quant.split

	for quant in IQ4_NL IQ4_XS Q4_0 Q4_K_M Q4_K_S Q5_K_M Q5_K_S Q6_K Q8_0
	nix run ../../tools/llama.cpp#gguf-split -- --split-max-size 48G ./c4ai-command-r-plus-imat-$quant.gguf ./c4ai-command-r-plus-imat-$quant.split && \
	rm ./c4ai-command-r-plus-imat-$quant.split
	end