Skip to content

Instantly share code, notes, and snippets.

@4dh
Last active December 30, 2023 19:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 4dh/220542d666329bbad2bb55b2202d5add to your computer and use it in GitHub Desktop.
Save 4dh/220542d666329bbad2bb55b2202d5add to your computer and use it in GitHub Desktop.
LlamaCPP sample- GRDN.AI
# import libraries
from llama_index import (
SimpleDirectoryReader,
VectorStoreIndex,
ServiceContext,
)
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import (
messages_to_prompt,
completion_to_prompt,
)
model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/llama-2-13b-chat.ggmlv3.q4_0.bin"
model_path = "/Users/dheym/Library/CloudStorage/OneDrive-Personal/Documents/side_projects/GRDN/src/models/llama-2-7b-chat.Q4_K_M.gguf"
# Initialize LlamaCPP with a local model for natural language processing
llm = LlamaCPP(
# Specify the local path to your pre-downloaded Llama model
model_path=model_path,
# Set temperature for controlled randomness in generation (0.1 for more deterministic)
temperature=0.1,
# Limit the number of new tokens to generate (1000 tokens for extensive responses)
max_new_tokens=1000,
# Define the context window size for the model (set below max to avoid token limit issues)
context_window=3000,
# Additional arguments for model generation can be passed here if needed
generate_kwargs={},
# Model initialization arguments, including GPU layer settings (may adjust based on hardware)
model_kwargs={
"n_gpu_layers": 1
}, # For M2 Max, confirm optimal settings from documentation
# Functions to format the prompts and completions for Llama model compatibility
messages_to_prompt=messages_to_prompt,
completion_to_prompt=completion_to_prompt,
# Enable verbose logging for detailed output (useful for development and debugging)
verbose=True,
)
response = llm.complete(
"Hello! tell me 3 short, concise bullet points about companion planting."
)
print(response.text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment