Skip to content

Instantly share code, notes, and snippets.

@VTSTech
Last active May 3, 2024 01:16
Show Gist options
  • Save VTSTech/bb858a2039fe84cd35af4064c0aa44d8 to your computer and use it in GitHub Desktop.
Save VTSTech/bb858a2039fe84cd35af4064c0aa44d8 to your computer and use it in GitHub Desktop.
VTSTech-GPT - Generate text with Cerebras GPT pretrained and Corianas finetuned models
# Program: VTSTech-GPT.py 2023-04-10 8:22:36PM
# Description: Python script that generates text with Cerebras GPT pretrained and Corianas finetuned models
# Author: Written by Veritas//VTSTech (veritas@vts-tech.org)
# GitHub: https://github.com/VTSTech
# Homepage: www.VTS-Tech.org
# Dependencies: transformers, colorama, Flask
# pip install transformers colorama flask
# Models are stored at C:\Users\%username%\.cache\huggingface\hub
import argparse
import time
import random
import warnings
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, Conversation
from colorama import Fore, Back, Style, init
from flask import Flask, request
global start_time, end_time, build, model_size, model_name, prompt_text
init(autoreset=True)
build="v0.3-r06"
tok=random.seed()
eos_token_id=tok
model_size = "111m"
model_name = "cerebras/Cerebras-GPT-111M"
parser = argparse.ArgumentParser(description='Generate text with Cerebras GPT models')
parser.add_argument('-m', '--model', choices=['111m', '256m', '590m','1.3b','2.7b','6.7b','13b'], help='Choose the model size to use (default: 111m)', type=str.lower)
parser.add_argument('-ce', '--cerb', action='store_true', help='Use Cerebras GPT pretrained models (default)')
parser.add_argument('-co', '--cori', action='store_true', help='Use Corianas finetuned models')
parser.add_argument('-nv', '--cuda', action='store_true', help='Use CUDA GPU')
parser.add_argument('-cv', '--conv', action='store_true', help='Conversation Mode')
parser.add_argument('-se', '--sent', action='store_true', help='Sentiment Mode')
parser.add_argument('-cu', '--custom', type=str, help='Specify a custom model')
parser.add_argument('-p', '--prompt', type=str, default="An intelligent AI describes Artificial Intelligence as", help='Text prompt to generate from')
parser.add_argument('-l', '--length', type=int, default=None, help="a value that controls the maximum number of tokens (words) that the model is allowed to generate")
parser.add_argument('-tk', '--topk', type=float, default=None, help="a value that controls the number of highest probability tokens to consider during generation")
parser.add_argument('-tp', '--topp', type=float, default=None, help="a value that controls the diversity of the generated text by only considering tokens with cumulative probability up to top_p")
parser.add_argument('-ty', '--typp', type=float, default=None, help="a value that controls the level of randomness in the generated text")
parser.add_argument('-tm', '--temp', type=float, default=None, help="a value that controls the degree of randomness in the generated text")
parser.add_argument('-ng', '--ngram', type=int, default=None, help=" a value that controls the degree to which repeated n-grams are penalized during generation")
parser.add_argument('-t', '--time', action='store_true', help='Print execution time')
parser.add_argument('-c', '--cmdline', action='store_true', help='cmdline mode, no webserver')
parser.add_argument('-cl', '--clean', action='store_true', help='Clean output')
parser.add_argument('-nw', '--nowarn', action='store_true', help='Suppress warnings')
args = parser.parse_args()
if args.clean or args.nowarn:
warnings.simplefilter("ignore")
if args.model:
model_size = args.model
if args.prompt:
prompt_text = args.prompt
if args.length is not None:
max_length = int(args.length)
else:
max_length=args.length
top_p = args.topp
top_k = args.topk
typ_p = args.typp
temp = args.temp
ngrams = args.ngram
def get_model():
global model_size, model_name
if args.cori:
if model_size == '111m':
model_name = "Corianas/111m"
elif model_size == '256m':
model_name = "Corianas/256m"
elif model_size == '590m':
model_name = "Corianas/590m"
elif model_size == '1.3b':
model_name = "Corianas/1.3B"
elif model_size == '2.7b':
model_name = "Corianas/2.7B"
elif model_size == '6.7b':
model_name = "Corianas/6.7B"
elif model_size == '13b':
model_name = "Corianas/13B"
elif args.cerb or not args.cmdline:
if model_size == '111m':
model_name = "cerebras/Cerebras-GPT-111M"
elif model_size == '256m':
model_name = "cerebras/Cerebras-GPT-256M"
elif model_size == '590m':
model_name = "cerebras/Cerebras-GPT-590M"
elif model_size == '1.3b':
model_name = "cerebras/Cerebras-GPT-1.3B"
elif model_size == '2.7b':
model_name = "cerebras/Cerebras-GPT-2.7B"
elif model_size == '6.7b':
model_name = "cerebras/Cerebras-GPT-6.7B"
elif model_size == '13b':
model_name = "cerebras/Cerebras-GPT-13B"
elif args.custom:
model_name = args.custom
return model_name
model_name = get_model()
def banner():
global model_name
if not args.clean:
print(Style.BRIGHT + f"VTSTech-GPT {build} - www: VTS-Tech.org git: VTSTech discord.gg/P4RDD76U")
print("Using Model : " + Fore.RED + f"{model_name}")
print("Using Prompt: " + Fore.YELLOW + f"{prompt_text}")
print("Using Params: " + Fore.YELLOW + f"max_new_tokens:{max_length} do_sample:True use_cache:True no_repeat_ngram_size:{ngrams} top_k:{top_k} top_p:{top_p} typical_p:{typ_p} temp:{temp}")
def CerbGPT(prompt_text):
global start_time, end_time, build, model_size, model_name
temp=None
top_k=None
top_p=None
start_time = time.time()
model_name = get_model()
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
opts = {}
if temp is not None:
opts["temperature"] = temp
if top_k is not None:
opts["top_k"] = top_k
if top_p is not None:
opts["top_p"] = top_p
if typ_p is not None:
opts["typical_p"] = typ_p
if ngrams is not None:
opts["no_repeat_ngram_size"] = ngrams
if max_length is not None:
opts["max_new_tokens"] = max_length
if args.cuda:
pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer, device=0)
if args.conv:
while True:
chatbot = pipeline("conversational", model=model_name,device_map='auto')
# Get user input
prompt_text = input("You: ")
conversation = Conversation(prompt_text)
# Exit loop if predefined prompt is received
if prompt_text == "exit":
exit()
break
# Generate response
conversation = chatbot(conversation)
response = conversation.generated_responses[-1]
# Print response
print("Bot:", response)
#print("Bot:", tokenizer.decode(response[0], skip_special_tokens=True))
if args.sent:
pipe = pipeline("sentiment-analysis", model=model_name, tokenizer=tokenizer, device_map='auto')
prompt_text = f"{prompt_text},{prompt_text}"
generated_text = pipe(prompt_text)
end_time = time.time()
return generated_text
else:
pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer, device_map='auto')
generated_text = pipe(prompt_text, do_sample=True, use_cache=True, **opts)[0]
end_time = time.time()
return generated_text['generated_text']
if not args.cmdline:
app = Flask(__name__)
@app.route('/', methods=['GET'])
def index():
return f"""<html><body><head><title>VTSTech-GPT {build}</title></head><p><a href='http://localhost:5000/'>VTSTech-GPT</a> <a href='https://gist.github.com/Veritas83/bb858a2039fe84cd35af4064c0aa44d8'>{build}</a> - <a href=https://www.VTS-Tech.org>www.VTS-Tech.org</a> <a href=https://github.com/Veritas83>github.com/Veritas83</a><br><br>ie: <a href=http://localhost:5000/generate?model=256m&prompt=AI%20is>Prompt: AI is</a><br><br>Click on the link above to visit /generate with the prompt: AI is. Change the prompt= parameter in the address bar to use your own prompts<br><br>
Other supported URL params:
<ul>
<li>model_size: <a href="http://localhost:5000/generate?model=111m">111m</a>, <a href="http://localhost:5000/generate?model=256m">256m</a>, <a href="http://localhost:5000/generate?model=590m">590m</a>, <a href="http://localhost:5000/generate?model=1.3b">1.3b</a>, <a href="http://localhost:5000/generate?model=2.7b">2.7b</a>, <a href="http://localhost:5000/generate?model=6.7b">6.7b</a>, <a href="http://localhost:5000/generate?model=13b">13b</a> (size of model in params)</li>
<li>top_p: <a href="http://localhost:5000/generate?top_p=0.1">0.1</a>, 1.0</li>
<li>top_k: <a href="http://localhost:5000/generate?top_k=0.1">0.1</a>, 50</li>
<li>temp: <a href="http://localhost:5000/generate?temp=0.1">0.1</a>, 1.0</li>
<li>size: <a href="http://localhost:5000/generate?size=20">20</a>, <a href="http://localhost:5000/generate?size=256">256</a>, <a href="http://localhost:5000/generate?size=1024">1024</a> (length of generated output)</li>
</ul>
</body></html>"""
@app.route('/generate', methods=['GET'])
def generate():
global model_size
model_size = request.args.get('model', '111m')
top_p = request.args.get('topp')
top_k = request.args.get('topk')
typ_p = request.args.get('typp')
temp = request.args.get('temp')
max_length = request.args.get('size')
prompt_text = request.args.get('prompt', 'AI is')
model_name = get_model()
generated_text = CerbGPT(prompt_text)
generated_text = f"<html><body><head><title>VTSTech-GPT {build}</title></head><p><a href='http://localhost:5000/'>VTSTech-GPT</a> <a href='https://gist.github.com/Veritas83/bb858a2039fe84cd35af4064c0aa44d8'>{build}</a> - <a href=https://www.VTS-Tech.org>www.VTS-Tech.org</a> <a href=https://github.com/Veritas83>github.com/Veritas83</a><br><br>Using Model : <b><a href=https://huggingface.co/{model_name}>{model_name}</a></b><br>Using Prompt: <i>{prompt_text}</i><br>Using Params: max_new_tokens:{max_length} do_sample:True use_cache:True no_repeat_ngram_size:2 top_k:{top_k} top_p:{top_p} typical_p:{typ_p} temp:{temp}<br><br>" + generated_text + f"<br><br>Execution time: {end_time - start_time:.2f} seconds</p></body></html>"
return generated_text
if __name__ == '__main__':
global start_time, end_time
if args.cmdline:
banner()
print(CerbGPT(prompt_text))
if args.time and not args.clean:
print(Style.BRIGHT + Fore.RED + f"Script finished. Execution time: {end_time - start_time:.2f} seconds")
else:
app.run(host='0.0.0.0', port=5000)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment