Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@DylanCodeCabin
Created October 12, 2021 13:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DylanCodeCabin/dde1ed8968c0ddfb5f685455ea22d82f to your computer and use it in GitHub Desktop.
Save DylanCodeCabin/dde1ed8968c0ddfb5f685455ea22d82f to your computer and use it in GitHub Desktop.
import flask
from flask import request, jsonify
from transformers import GPTNeoForCausalLM, AutoTokenizer
import deepspeed
print("Starting Server")
print("Loading EleutherAI")
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B").half().to("cuda")
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
print("EleutherAI Ready")
print("Creating Flask")
app = flask.Flask(__name__)
@app.route('/',methods=['GET'])
def home():
return {
"message": "Welcome to the GPT Instance",
"routes" : {
"/" : "Welcome screen",
"/compose" : "Generate text from input (secure)"
}
}
@app.route('/compose',methods=['GET'])
def compose():
text = request.args.get('text', '')
output_length = int(request.args.get('o', '400'))
temp_value = float(request.args.get('t', '0.9'))
iterations= int(request.args.get('n', '1'))
# You would want some form of authentication here
ids = tokenizer(text, return_tensors="pt").input_ids.to("cuda")
max_length = output_length + ids.shape[1]
output_list = []
for n in range(iterations):
gen_tokens = model.generate(
ids,
do_sample=True,
min_length=max_length,
max_length=max_length,
temperature=temp_value,
use_cache=True,
)
gen_text = tokenizer.batch_decode(gen_tokens)[0]
output_list.append(gen_text)
return {"input": text, "output" : output_list}
print("Server Ready")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment