Skip to content

Instantly share code, notes, and snippets.

@qkdxorjs1002
Last active January 24, 2024 01:33
Show Gist options
  • Save qkdxorjs1002/0dd92c6a110317fb4655a88eb85910d8 to your computer and use it in GitHub Desktop.
Save qkdxorjs1002/0dd92c6a110317fb4655a88eb85910d8 to your computer and use it in GitHub Desktop.
import torch
from transformers import pipeline, AutoModelForCausalLM
MODEL = 'beomi/KoAlpaca-Polyglot-12.8B'
DEVICE = torch.device("cuda") if torch.cuda.is_available() else (torch.device("mps") if torch.backends.mps.is_available() else "cpu")
model = AutoModelForCausalLM.from_pretrained(
MODEL,
torch_dtype=torch.float16,
low_cpu_mem_usage=True,
).to(device=DEVICE, non_blocking=True)
model.eval()
pipe = pipeline(
'text-generation',
model=model,
tokenizer=MODEL,
device=DEVICE
)
def ask(x, context='', is_input_full=False):
ans = pipe(
f"### 질문: {x}\n\n### 맥락: {context}\n\n### 답변:" if context else f"### 질문: {x}\n\n### 답변:",
do_sample=True,
max_new_tokens=512,
temperature=0.7,
top_p=0.9,
return_full_text=False,
eos_token_id=2,
pad_token_id=2,
)
print(ans[0]['generated_text'])
return ans[0]['generated_text']
import koalp
while True:
koalp.ask(input("> "))
import koalp
from flask import Flask, request, json, jsonify
app = Flask(__name__)
app.config['JSON_AS_ASCII'] = False
@app.route("/ask", methods=['POST'])
def requestAsk():
params = request.get_json()
askContext = params['ask']
askResult = koalp.ask(askContext)
response = {
"ask": askContext,
"result": askResult
}
print(askContext)
return jsonify(response)
if __name__ == "__main__":
app.run(debug=False, host='0.0.0.0', port=8080)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment