0xDigest

## llm_compression.py
import llama_cpp
import re
import json

# Model configuration
# tested with mistral, llama2, llama3, and phi3
model_path = "/path/to/model"

base_llm = llama_cpp.Llama(model_path, seed=42, n_gpu_layers=-1, n_ctx=4096, verbose=False, temperature=0.0)
	import llama_cpp
	import re
	import json

	# Model configuration
	# tested with mistral, llama2, llama3, and phi3
	model_path = "/path/to/model"

	base_llm = llama_cpp.Llama(model_path, seed=42, n_gpu_layers=-1, n_ctx=4096, verbose=False, temperature=0.0)