|
from llama_cpp import Llama, LlamaRAMCache |
|
|
|
import outlines |
|
|
|
# Begin example transcript (two turns, snippet only, end of transcript trancated): |
|
|
|
@outlines.prompt |
|
def generate_prompt(messages, nudge_role = "assistant"): |
|
""" |
|
Below is the transcript of a conversation between chatbot and human. \ |
|
The chatbot is based on an instruct fine-tuned LLM (Large Language Model), \ |
|
and is able to follow instruction closely and intelligently. |
|
|
|
The transcript is formated in chatml format, which is a standard text markup \ |
|
specifically for chatbot applications. The format make use of special markup \ |
|
such as <|im_start|> and <|im_end|>. Also notice the "role", which may be \ |
|
"system", "user", or "assistant". System message has higher pirority in terms \ |
|
of instruction following of the chatbot. |
|
|
|
Begin example transcript snippet 1 (two turns, assistant persona, markdown formatting): |
|
<|im_start|>system |
|
You are a helpful AI assistant. |
|
<|im_end|> |
|
<|im_start|>user |
|
Hi there! Who are you? |
|
<|im_end|> |
|
<|im_start|>assistant |
|
Hello! I am an AI assistant at your service, ready to assist you for any questions \ |
|
or requests, so feel free to ask me anything. Let's get started! |
|
<|im_end|> |
|
<|im_start|>user |
|
Suggest a travel plan for my holiday to Tokyo with 4 other classmates, \ |
|
flash style travel at only 3 days. We are college students in Taiwan. |
|
<|im_end|> |
|
<|im_start|>assistant |
|
# 3 Days in Tokyo: A Flashy Adventure with Friends! |
|
|
|
As college students from Taiwan, you're in for a thrilling adventure in Tokyo with your friends! With only 3 days, we'll pack in the best of Tokyo, covering its vibrant culture, mouth-watering food, and iconic landmarks. |
|
|
|
## Day 1: Explore Shinjuku & Shibuya |
|
|
|
- **9:00 AM - 10:00 AM:** Breakfast |
|
|
|
Start your day with a traditional Japanese breakfast at a local eatery in Shinjuku's Golden Gai. Try their signature breakfast dishes like tamagoyaki (Japanese-style omelette) and miso soup. |
|
|
|
- **10:00 AM - 12:00 PM:** Explore Shinjuku Gyoen National Garden |
|
|
|
Take a stroll through this beautiful park, featuring a harmonious blend of Japanese, French, and English garden styles. |
|
[End example transcript snippet 1] |
|
|
|
Begin example transcript snippet 2 (codeblock formatting): |
|
<|im_start|>user |
|
Show me the code for a Hello world Node JS program. |
|
<|im_end|> |
|
<|im_start|>assistant |
|
Here's a trivial hello world program in Node JS: |
|
```js |
|
const n = 1 + 1 |
|
console.log("Hello world: " + n) |
|
``` |
|
|
|
To run the program, execute this in your terminal (assuming above file is saved as `main.js`): |
|
``` |
|
node main.js |
|
``` |
|
|
|
It will then print the results of calculating `1 + 1` (alongside the greeting) in standard out. |
|
<|im_end|> |
|
[End example transcript snippet 2] |
|
|
|
Begin transcript: |
|
{% for m in messages %} |
|
<|im_start|>{{ m['role'] }} |
|
{{ m['content'] }} |
|
<|im_end|> |
|
{% endfor %} |
|
<|im_start|>{{ nudge_role }} |
|
|
|
""" |
|
|
|
LOCAL_PATH = "/home/zeus/.cache/huggingface/hub/models--mradermacher--OLMo-1.7-7B-hf-GGUF/snapshots/dc7cb3087cefdf63bb5ec8654563e403681406d4/OLMo-1.7-7B-hf.Q6_K.gguf" |
|
|
|
context_length = 2048 |
|
|
|
stop_tokens = ["<|im_end|>"] |
|
|
|
llm = Llama(model_path=LOCAL_PATH, n_ctx=context_length) |
|
llm.set_cache(LlamaRAMCache()) |
|
|
|
messages = [ |
|
{ |
|
"role": "system", |
|
"content": "You are a helpful AI assistant. You always answer user questions and carry out tasks on their behalf. You have a bias for action, but will accept user feedback. Unless otherwise instructed, you use markdown format in your reply." |
|
} |
|
] |
|
|
|
while True: |
|
user_q = input("User > ") |
|
messages.append({ "role": "user", "content": user_q }) |
|
llm_stream = llm(prompt = generate_prompt(messages), max_tokens=400, stop=stop_tokens, stream=True) |
|
response = "" |
|
for t in llm_stream: |
|
u = t["choices"][0]["text"] |
|
response = response + u |
|
print(u, end='', flush=True) |
|
messages.append({ "role": "assistant", "content": response }) |
|
|
|
print("Bye") |