Skip to content

Instantly share code, notes, and snippets.

@ealmloff
Created February 28, 2024 18:51
Show Gist options
  • Save ealmloff/3398d172180fa783f043b4a26960b19a to your computer and use it in GitHub Desktop.
Save ealmloff/3398d172180fa783f043b4a26960b19a to your computer and use it in GitHub Desktop.
use kalosm::language::*;
#[tokio::main]
async fn main() {
let local_source = LlamaSource::new(
FileSource::Local("path/to/llama/model".into()),
FileSource::Local("path/to/llama/tokenizer.json".into()),
)
.with_group_query_attention(
// 1 for llama, 8 for mistral
8,
)
// If this is a chat model, you can set the chat markers it uses here
.with_chat_markers(ChatMarkers {
system_prompt_marker: "<s>[INST] ",
end_system_prompt_marker: " [/INST]",
user_marker: "[INST] ",
end_user_marker: " [/INST]",
assistant_marker: "",
end_assistant_marker: "</s>",
});
// You can set the source of the model with the `with_source` method
let mut model = Llama::builder().with_source(local_source).build().unwrap();
let mut chat = Chat::builder(&mut model)
.with_system_prompt("The assistant will act like a pirate")
.build();
loop {
chat.add_message(prompt_input("\n> ").unwrap())
.await
.unwrap()
.to_std_out()
.await
.unwrap();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment