Skip to content

Instantly share code, notes, and snippets.

@bdashore3
Last active October 4, 2023 03:53
Show Gist options
  • Save bdashore3/581f5ec20c04fdb26298fd843756436b to your computer and use it in GitHub Desktop.
Save bdashore3/581f5ec20c04fdb26298fd843756436b to your computer and use it in GitHub Desktop.
GGUF quant script
#!/usr/bin/env python3
import argparse
import os
import subprocess
def main(model, outbase, outdir):
llamabase = "F:\AI\ggml\llama-cpp"
llamabuild = "F:\AI\ggml\llama-cpp-build"
if not os.path.isdir(model):
raise Exception(f"Could not find model dir at {model}")
if not os.path.isfile(f"{model}/config.json"):
raise Exception(f"Could not find config.json in {model}")
os.makedirs(outdir, exist_ok=True)
# print("Building llama.cpp")
# subprocess.run(f"cd {llamabase} && git pull && make clean && LLAMA_CUBLAS=1 make", shell=True, check=True)
fp16 = f"{outdir}/{outbase}.fp16.gguf"
print(f"Making unquantised GGUF at {fp16}")
if not os.path.isfile(fp16):
subprocess.run(f"python {llamabase}/convert.py {model} --outtype f16 --outfile {fp16}", shell=True, check=True)
else:
print(f"Unquantised GGUF already exists at: {fp16}")
print("Making quants")
# for type in ["q4_K_S", "q5_K_M", "q6_K", "q8_0"]:
for type in ["q8_0"]:
outfile = f"{outdir}/{outbase}.{type}.gguf"
print(f"Making {type} : {outfile}")
subprocess.run(f"{llamabuild}/quantize {fp16} {outfile} {type}", shell=True, check=True)
os.remove(fp16)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert Bash to Python.')
parser.add_argument('model', help='Model directory')
parser.add_argument('outbase', help='Output base name')
parser.add_argument('outdir', help='Output directory')
args = parser.parse_args()
main(args.model, args.outbase, args.outdir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment