Skip to content

Instantly share code, notes, and snippets.

@billju
Created September 23, 2023 08:31
Show Gist options
  • Save billju/989364a88c3d20fbf0762c6328b7269e to your computer and use it in GitHub Desktop.
Save billju/989364a88c3d20fbf0762c6328b7269e to your computer and use it in GitHub Desktop.
llama-cpp-python 測試
from llama_cpp import Llama
from glob import glob
n_gpu_layers = 50 if input('使用GPU嗎(Y/n)').upper()=='Y' else 0
n_threads = None if n_gpu_layers else 4
ggmls = glob('*ggml*.bin')
for i,ggml in enumerate(ggmls): print(i, ggml)
ggml = ggmls[int(input('選擇模型'))]
llm = Llama(ggml, n_gpu_layers=n_gpu_layers, n_threads=n_threads, verbose=False)
while True:
prompt = 'Q:{Q}?\nA:'.format(Q=input('\nyou>'))
for output in llm(prompt, stop=['Q:','###'], stream=True):
print(output['choices'][0]['text'], end='', flush=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment