Skip to content

Instantly share code, notes, and snippets.

@DylanCodeCabin
Created Oct 12, 2021
Embed
What would you like to do?
import flask
from flask import request, jsonify
from transformers import GPTNeoForCausalLM, AutoTokenizer
import deepspeed
print("Starting Server")
print("Loading EleutherAI")
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B").half().to("cuda")
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
print("EleutherAI Ready")
print("Creating Flask")
app = flask.Flask(__name__)
@app.route('/',methods=['GET'])
def home():
return {
"message": "Welcome to the GPT Instance",
"routes" : {
"/" : "Welcome screen",
"/compose" : "Generate text from input (secure)"
}
}
@app.route('/compose',methods=['GET'])
def compose():
text = request.args.get('text', '')
output_length = int(request.args.get('o', '400'))
temp_value = float(request.args.get('t', '0.9'))
iterations= int(request.args.get('n', '1'))
# You would want some form of authentication here
ids = tokenizer(text, return_tensors="pt").input_ids.to("cuda")
max_length = output_length + ids.shape[1]
output_list = []
for n in range(iterations):
gen_tokens = model.generate(
ids,
do_sample=True,
min_length=max_length,
max_length=max_length,
temperature=temp_value,
use_cache=True,
)
gen_text = tokenizer.batch_decode(gen_tokens)[0]
output_list.append(gen_text)
return {"input": text, "output" : output_list}
print("Server Ready")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment