Last active
December 30, 2023 19:07
-
-
Save 4dh/220542d666329bbad2bb55b2202d5add to your computer and use it in GitHub Desktop.
LlamaCPP sample- GRDN.AI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import libraries | |
from llama_index import ( | |
SimpleDirectoryReader, | |
VectorStoreIndex, | |
ServiceContext, | |
) | |
from llama_index.llms import LlamaCPP | |
from llama_index.llms.llama_utils import ( | |
messages_to_prompt, | |
completion_to_prompt, | |
) | |
model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/llama-2-13b-chat.ggmlv3.q4_0.bin" | |
model_path = "/Users/dheym/Library/CloudStorage/OneDrive-Personal/Documents/side_projects/GRDN/src/models/llama-2-7b-chat.Q4_K_M.gguf" | |
# Initialize LlamaCPP with a local model for natural language processing | |
llm = LlamaCPP( | |
# Specify the local path to your pre-downloaded Llama model | |
model_path=model_path, | |
# Set temperature for controlled randomness in generation (0.1 for more deterministic) | |
temperature=0.1, | |
# Limit the number of new tokens to generate (1000 tokens for extensive responses) | |
max_new_tokens=1000, | |
# Define the context window size for the model (set below max to avoid token limit issues) | |
context_window=3000, | |
# Additional arguments for model generation can be passed here if needed | |
generate_kwargs={}, | |
# Model initialization arguments, including GPU layer settings (may adjust based on hardware) | |
model_kwargs={ | |
"n_gpu_layers": 1 | |
}, # For M2 Max, confirm optimal settings from documentation | |
# Functions to format the prompts and completions for Llama model compatibility | |
messages_to_prompt=messages_to_prompt, | |
completion_to_prompt=completion_to_prompt, | |
# Enable verbose logging for detailed output (useful for development and debugging) | |
verbose=True, | |
) | |
response = llm.complete( | |
"Hello! tell me 3 short, concise bullet points about companion planting." | |
) | |
print(response.text) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment