Skip to content

Instantly share code, notes, and snippets.

@Norod
Created July 3, 2023 09:24
Show Gist options
  • Save Norod/eff16d46e494d9d3f02152259bbd55f0 to your computer and use it in GitHub Desktop.
Save Norod/eff16d46e494d9d3f02152259bbd55f0 to your computer and use it in GitHub Desktop.
YamPeleg-HebrewGPT-GradioDemo.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"private_outputs": true,
"provenance": [],
"gpuType": "T4",
"authorship_tag": "ABX9TyODhKAaEIq+NIt0CY5PQBa0",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/Norod/eff16d46e494d9d3f02152259bbd55f0/yampeleg-hebrewgpt-gradiodemo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"source": [
"!pip install transformers tokenizers deepspeed xformers bitsandbytes accelerate gradio huggingface_hub sentencepiece"
],
"metadata": {
"id": "8BKOA-W_Zndy"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from huggingface_hub import login\n",
"login()"
],
"metadata": {
"id": "NKzPLSrnBbwc"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "cIBivOgKZY6h"
},
"outputs": [],
"source": [
"\n",
"import deepspeed\n",
"import torch\n",
"from transformers import pipeline\n",
"import os\n",
"\n",
"model_id = 'YamPeleg/HebrewGPT-instruct' #@param ['YamPeleg/HebrewGPT','YamPeleg/HebrewGPT-instruct', 'Norod78/hebrew-gpt_neo-xl']\n",
"\n",
"text_title = model_id.replace(\"/\", \" - \") + ' - Gradio Demo'\n",
"\n",
"should_use_fast = False\n",
"if 'Norod78' in model_id:\n",
" should_use_fast = True\n",
"\n",
"print(f'should_use_fast = {should_use_fast}')\n",
"\n",
"local_rank = int(os.getenv('LOCAL_RANK', '0'))\n",
"world_size = int(os.getenv('WORLD_SIZE', '1'))\n",
"generator = pipeline('text-generation', model=model_id,\n",
" tokenizer=model_id,\n",
" torch_dtype = torch.float16,\n",
" use_fast=should_use_fast,\n",
" device_map = \"auto\")\n",
"\n",
"if 'YamPeleg' in model_id:\n",
" generator.model.config.pad_token_id = generator.tokenizer.pad_token_id = 0\n",
" generator.model.config.bos_token_id = 1\n",
" generator.model.config.eos_token_id = 2\n",
"\n",
"\n",
"# setting device on GPU if available, else CPU\n",
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
"print('Using device:', device)\n",
"print()\n",
"\n",
"total_mem = 0\n",
"\n",
"#Additional Info when using cuda\n",
"if device.type == 'cuda':\n",
" print(torch.cuda.get_device_name(0))\n",
" print('Memory Usage:')\n",
" total_mem = round(torch.cuda.get_device_properties(0).total_memory/1024**3,1)\n",
" print('Total: ', total_mem, 'GB')\n",
" print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')\n",
" print('Cached: ', round(torch.cuda.memory_cached(0)/1024**3,1), 'GB')\n",
"\n",
"should_replace_with_kernel_inject = False\n",
"if (total_mem >= 20) or ('Norod78' in model_id):\n",
" should_replace_with_kernel_inject = True\n",
"\n",
"print(f'should_replace_with_kernel_inject = {should_replace_with_kernel_inject}')\n",
"\n",
"ds_engine = deepspeed.init_inference(generator.model,\n",
" mp_size=world_size,\n",
" dtype=torch.half,\n",
" replace_with_kernel_inject=should_replace_with_kernel_inject)\n",
"generator.model = ds_engine.module"
]
},
{
"cell_type": "code",
"source": [
"#Sanity, Just make sure we can generate\n",
"init_text = \"האיש האחרון עלי אדמות ישב לבד בחדרו, כשלפתע\"\n",
"\n",
"string = generator(init_text, do_sample=True, min_length=20, max_length=64, top_k=40, top_p=0.92, temperature=0.9)\n",
"if not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0:\n",
" print(string)"
],
"metadata": {
"id": "DWXB0Il-jQaB"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import gradio as gr\n",
"from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer\n",
"from threading import Thread\n",
"import torch\n",
"\n",
"model = generator.model\n",
"tok = generator.tokenizer\n",
"\n",
"CUDA_AVAILABLE = torch.cuda.is_available()\n",
"device = torch.device(\"cuda\" if CUDA_AVAILABLE else \"cpu\")\n",
"\n",
"def generate(text = \"\"):\n",
" print(\"Create streamer\")\n",
" yield \"[אנא המתינו לתשובה]\"\n",
" streamer = TextIteratorStreamer(tok, timeout=5.)\n",
" if len(text) == 0:\n",
" text = \"\\n\"\n",
"\n",
" inputs = tok([text], return_tensors=\"pt\").to(device)\n",
" generation_kwargs = dict(inputs, streamer=streamer, do_sample=True, top_k=40, top_p=0.3, temperature=0.3, num_beams = 1 ,max_new_tokens=256, pad_token_id = model.config.eos_token_id, early_stopping=True, no_repeat_ngram_size=4)\n",
" thread = Thread(target=model.generate, kwargs=generation_kwargs)\n",
" thread.start()\n",
" generated_text = \"\"\n",
" for new_text in streamer:\n",
" yield generated_text + new_text\n",
" print(new_text, end =\"\")\n",
" generated_text += new_text\n",
" return generated_text\n",
"\n",
"demo = gr.Interface(\n",
" title=text_title,\n",
" fn=generate,\n",
" inputs=gr.Textbox(label=\"כתבו כאן את הטקסט שלכם או השאירו ריק\", elem_id=\"input_text\"),\n",
" outputs=gr.Textbox(type=\"text\", label=\"פה יופיע הטקסט שהמחולל יחולל\", elem_id=\"output_text\"),\n",
" css=\"#output_text{direction: rtl} #input_text{direction: rtl}\"\n",
")\n",
"\n",
"demo.queue()\n",
"demo.launch(debug=True)"
],
"metadata": {
"id": "FhSCepWx77Vu"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment