Skip to content

Instantly share code, notes, and snippets.

Created May 3, 2024 16:57
Show Gist options
  • Save av1d/727ba3eac2b42e661f2ebabaf1256115 to your computer and use it in GitHub Desktop.
Save av1d/727ba3eac2b42e661f2ebabaf1256115 to your computer and use it in GitHub Desktop.
Make 3 AI LLM models speak amongst themselves randomly (ollama, llama.cpp, RK3588 NPU server)
import json
import random
import re
import requests
import textwrap
# a random model is selected each time without the possibility
# of selecting the same one twice in a row (so it doesn't speak to itself)
# Terminal colors
BLACK = '\033[30m'
RED = '\033[31m'
GREEN = '\033[32m'
YELLOW = '\033[33m'
BLUE = '\033[34m'
MAGENTA = '\033[35m'
CYAN = '\033[36m'
WHITE = '\033[37m'
# lighter colors
LIGHT_RED = '\033[91m'
LIGHT_GREEN = '\033[92m'
LIGHT_YELLOW = '\033[93m'
LIGHT_BLUE = '\033[94m'
LIGHT_MAGENTA = '\033[95m'
LIGHT_CYAN = '\033[96m'
LIGHT_GRAY = '\033[37m'
# reset colors
RESET = '\033[0m'
# server addresses
NPU_PORT = "31337"
LLAMA_PORT = "8080"
# servers
server1 = "npu"
server2 = "ollama"
server3 = "llamacpp"
# remembers the last server used so we don't prompt
# the same one twice in a row
last_picked = None
def npu(query: str) -> str:
headers = {
'Content-Type': 'application/json',
prefix = (
"<|im_start|>system You are a helpful assistant. <|im_end|> "
"<|im_start|>user "
postfix = (
"<|im_end|><|im_start|>assistant "
json_data = {
'input_str': str(query) + ' ',
response =
response = response.json()
answer = response['content']
return answer
def llamacpp(query: str) -> str:
prompt_text = (
f"This is a conversation between User and Llama, a friendly "
f"chatbot. Llama is helpful, kind, honest, good at writing, and "
f"never fails to answer any requests immediately and with "
f"User: {query} "
prompt_text = (
f"This is a conversation between User and Llama, an analyitically-minded "
f"thinker who deconstructs everything everyone says and analyzes it for logical fallacy. "
f"Lama refutes and contests anything which doesn't have scientific foundations. "
f"User: {query} "
headers = {
'Accept': 'text/event-stream',
'Accept-Language': 'en-US,en;q=0.9,it;q=0.8',
'Connection': 'keep-alive',
'Content-Type': 'application/json',
json_data = {
'stream': True,
'n_predict': 400,
'temperature': 0,
'stop': [
'repeat_last_n': 256,
'repeat_penalty': 2,
'top_k': 40,
'top_p': 0.95,
'min_p': 0,
'tfs_z': 1,
'typical_p': 1,
'presence_penalty': 0,
'frequency_penalty': 0,
'mirostat': 0,
'mirostat_tau': 5,
'mirostat_eta': 0.1,
'grammar': '',
'n_probs': 0,
'min_keep': 0,
'image_data': [],
'cache_prompt': True,
'api_key': '',
'slot_id': -1,
'prompt': prompt_text,
response =
data = response.content.decode('utf-8')
lines = data.split('\n')
content = ''
for line in lines:
if line.startswith('data: '):
json_data = json.loads(line[6:])
content += json_data['content']
return content
def ollama(query: str) -> str:
headers = {
'Content-Type': 'application/json',
data = {
"model": "tinyllama:latest",
"prompt": query
response ='http://localhost:11434/api/generate', headers=headers, json=data)
json_response = response.content.decode('utf-8')
lines = json_response.strip().split('\n')
concatenated_response = ''
for line in lines:
data = json.loads(line)
concatenated_response += data['response']
except json.JSONDecodeError:
return concatenated_response
def trim_text(text: str) -> str:
# split sentences at .!? into list
result = re.split(r'(?<=[.!?]) +', text)
# keep only the first N
result = result[:10]
# convert back to string
result = ''.join(result)
return result
def select_server() -> str:
global last_picked
while True:
if last_picked == server1:
choices = [server2, server3]
elif last_picked == server2:
choices = [server1, server3]
elif last_picked == server3:
choices = [server1, server2]
choices = [server1, server2, server3]
choice = random.choice(choices)
last_picked = choice
print(f"{GREEN}[chose server: {choice}]{RESET}")
return choice
start = "What is the weirdest thing you know?"
print(f"Starting bias seed: {start}\n")
for i in range(10):
server = select_server()
if server == 'ollama':
start = trim_text(ollama(start))
start = textwrap.fill(start, width=80)
print("[TinyLlama]: " + LIGHT_GRAY + str(start) + RESET + "\n")
elif server == 'npu':
start = trim_text(npu(start))
start = textwrap.fill(start, width=80)
print("[Qwen]: " + LIGHT_CYAN + str(start) + RESET + "\n")
elif server == 'llamacpp':
start = trim_text(llamacpp(start))
start = textwrap.fill(start, width=80)
print("[Zephyr]: " + LIGHT_YELLOW + str(start) + RESET + "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment