Last active
June 30, 2023 13:15
-
-
Save ZiTAL/bcea80f616795ff8c1275aa22587543d to your computer and use it in GitHub Desktop.
python: open llama
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# -*- coding: utf-8 -*- | |
# pip3 install | |
# 3B | |
# https://huggingface.co/TheBloke/orca_mini_3B-GGML/tree/main | |
# 7B | |
# https://huggingface.co/TheBloke/open-llama-7b-open-instruct-GGML/tree/main | |
# 13B | |
# https://huggingface.co/TheBloke/open-llama-13b-open-instruct-GGML/tree/main | |
import os, sys, re | |
from llama_cpp import Llama | |
dir = os.path.dirname(os.path.abspath(sys.argv[0])) | |
files = sorted(list(filter(lambda file: re.match(r'^(.*?)\.bin$', file), os.listdir(dir)))) | |
if(len(files)==0): | |
print("First download models from: ") | |
print("") | |
print("3B: ") | |
print("https://huggingface.co/TheBloke/orca_mini_3B-GGML/tree/main") | |
print("") | |
print("7B: ") | |
print("https://huggingface.co/TheBloke/open-llama-7b-open-instruct-GGML/tree/main") | |
print("") | |
print("13B: ") | |
print("https://huggingface.co/TheBloke/open-llama-13b-open-instruct-GGML/tree/main") | |
sys.exit(1) | |
print("Model list:") | |
for i, file in enumerate(files): | |
print(str(i+1)+": "+file) | |
index = input("Choose the model: (default: "+files[0]+") ") | |
try: | |
index = int(index) | |
except: | |
index = 1 | |
index = index - 1 | |
try: | |
model = files[index] | |
except: | |
model = files[0] | |
model = "./"+model | |
llm = Llama( | |
model_path = model, | |
lora_path = None, | |
n_batch = 512, | |
n_ctx = 1024, | |
n_gpu_layers = 0, | |
n_threads = 3, | |
# verbose = True | |
verbose = False | |
) | |
while True: | |
print("") | |
print("Question: ") | |
print("") | |
stream = llm( | |
"### System: You are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n### User: "+input()+"\n\n### Response: ", | |
max_tokens=500, | |
stop=["###"], | |
stream=True, | |
echo=True | |
) | |
for token in stream: | |
print(token["choices"][0]["text"], end="", flush=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
base on: https://www.reddit.com/r/LocalLLaMA/comments/14lo34l/orca_mini_3b_on_a_pi_4_in_real_time/
Eskerrik asko https://mastodon.eus/@xezpeleta
https://mastodon.eus/@xezpeleta/110629470608734043