yuhanz/chat-bot.py

## chat-bot.py
### Download files from https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0/tree/main

import time
from transformers import AutoTokenizer
import transformers
import torch
# model = "PY007/TinyLlama-1.1B-step-50K-105b"
# model = "yuhanzgithub/tinyllama"
model = "./"
tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    # "text-generation",
    "conversational",
    model=model,
    # torch_dtype=torch.float16,
    # device_map="auto",
    torch_dtype=torch.float16,
    device_map="mps",
    # torch_dtype=torch.float32,
    # device_map="cpu",
)

messages = [
    {
        "role": "system",
        "content": "You are a expert chatbot who tries provide the right answer",
    },
    {"role": "user", "content": "how are you?"},
]
encoded_text = tokenizer.apply_chat_template(messages, tokenize=False)

while True:
    print('Q: ')
    input1 = input()
    if(input1 == 'exit'):
        exit()
    messages = [
        {
            "role": "system",
            "content": "You are a expert chatbot who tries provide the right answer",
        },
        {"role": "user", "content": input1},
    ]
    print('Running...')
    start_time = time.time()

    sequences = pipeline(
        messages,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        repetition_penalty=1.5,
        eos_token_id=tokenizer.eos_token_id,
        max_length=100,
    )
    end_time = time.time()
    elapsed_time = end_time - start_time
    print("Elapsed time:", elapsed_time, "seconds")

    print("sequences:", sequences)
    # for seq in sequences:
    #     print(f"A: {seq['generated_text']}")
	### Download files from https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0/tree/main

	import time
	from transformers import AutoTokenizer
	import transformers
	import torch
	# model = "PY007/TinyLlama-1.1B-step-50K-105b"
	# model = "yuhanzgithub/tinyllama"
	model = "./"
	tokenizer = AutoTokenizer.from_pretrained(model)
	pipeline = transformers.pipeline(
	# "text-generation",
	"conversational",
	model=model,
	# torch_dtype=torch.float16,
	# device_map="auto",
	torch_dtype=torch.float16,
	device_map="mps",
	# torch_dtype=torch.float32,
	# device_map="cpu",
	)

	messages = [
	{
	"role": "system",
	"content": "You are a expert chatbot who tries provide the right answer",
	},
	{"role": "user", "content": "how are you?"},
	]
	encoded_text = tokenizer.apply_chat_template(messages, tokenize=False)

	while True:
	print('Q: ')
	input1 = input()
	if(input1 == 'exit'):
	exit()
	messages = [
	{
	"role": "system",
	"content": "You are a expert chatbot who tries provide the right answer",
	},
	{"role": "user", "content": input1},
	]
	print('Running...')
	start_time = time.time()

	sequences = pipeline(
	messages,
	do_sample=True,
	top_k=10,
	num_return_sequences=1,
	repetition_penalty=1.5,
	eos_token_id=tokenizer.eos_token_id,
	max_length=100,
	)
	end_time = time.time()
	elapsed_time = end_time - start_time
	print("Elapsed time:", elapsed_time, "seconds")

	print("sequences:", sequences)
	# for seq in sequences:
	# print(f"A: {seq['generated_text']}")