Norod/apple_openelm-3b_cuda_gradio-demo.ipynb

## apple_openelm-3b_cuda_gradio-demo.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4",
      "authorship_tag": "ABX9TyOkZEtBSgQTEZQXPIw4EOLp",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "8c1f5939dfaf4545b756f2eec1e75bf4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_2221b6ee4c904c28bd5884280df24dfc",
              "IPY_MODEL_f8707a3db4134a37a8e90d4bd9713c26",
              "IPY_MODEL_371fe6af56614ecaae324d243ec3d03a"
            ],
            "layout": "IPY_MODEL_5dc96aae3469481b9aa04808292c13ae"
          }
        },
        "2221b6ee4c904c28bd5884280df24dfc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_526e7c3b01434ace889b466f298a9595",
            "placeholder": "",
            "style": "IPY_MODEL_0d05addda3974b9986b5383d809648b8",
            "value": "Loading checkpoint shards: 100%"
          }
        },
        "f8707a3db4134a37a8e90d4bd9713c26": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_ad67a168de8642d3b6800552beb57db0",
            "max": 3,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_f27ba639f7cf4c17b1b6000cbd5a0799",
            "value": 3
          }
        },
        "371fe6af56614ecaae324d243ec3d03a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_90a934616e5f45ffa987d64ff8dc5d13",
            "placeholder": "",
            "style": "IPY_MODEL_b7262e709af44d0c94d72eafe6a9c4c8",
            "value": " 3/3 [00:49&lt;00:00, 15.17s/it]"
          }
        },
        "5dc96aae3469481b9aa04808292c13ae": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "526e7c3b01434ace889b466f298a9595": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0d05addda3974b9986b5383d809648b8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "ad67a168de8642d3b6800552beb57db0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f27ba639f7cf4c17b1b6000cbd5a0799": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "90a934616e5f45ffa987d64ff8dc5d13": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b7262e709af44d0c94d72eafe6a9c4c8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/Norod/4f11bb36bea5c548d18f10f9d7ec09b0/apple_openelm-3b_cuda_gradio-demo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "RTmA16d-1eQV"
      },
      "outputs": [],
      "source": [
        "!pip install -q gradio accelerate"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Make sure to set the HF_TOKEN notebook secret and allow access to a read token"
      ],
      "metadata": {
        "id": "MZt9Q8sKCNQK"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import torch\n",
        "import gradio as gr\n",
        "from threading import Thread\n",
        "from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer\n",
        "\n",
        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
        "torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32\n",
        "low_cpu_mem_usage = True if torch.cuda.is_available() else False\n",
        "\n",
        "checkpoint = \"OpenELM-3B-Instruct\"\n",
        "checkpoint_tok = \"meta-llama/Llama-2-7b-hf\"\n",
        "tokenizer = AutoTokenizer.from_pretrained(checkpoint_tok)\n",
        "model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch_dtype, trust_remote_code=True, low_cpu_mem_usage=low_cpu_mem_usage)\n",
        "model.to(device)\n",
        "\n",
        "text_title = checkpoint.replace(\"/\", \" - \") + ' (' + str(model.device) + ') - Gradio Demo'\n"
      ],
      "metadata": {
        "id": "frvAniQU1iWm",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 49,
          "referenced_widgets": [
            "8c1f5939dfaf4545b756f2eec1e75bf4",
            "2221b6ee4c904c28bd5884280df24dfc",
            "f8707a3db4134a37a8e90d4bd9713c26",
            "371fe6af56614ecaae324d243ec3d03a",
            "5dc96aae3469481b9aa04808292c13ae",
            "526e7c3b01434ace889b466f298a9595",
            "0d05addda3974b9986b5383d809648b8",
            "ad67a168de8642d3b6800552beb57db0",
            "f27ba639f7cf4c17b1b6000cbd5a0799",
            "90a934616e5f45ffa987d64ff8dc5d13",
            "b7262e709af44d0c94d72eafe6a9c4c8"
          ]
        },
        "outputId": "42590ee6-3b2b-4f51-931b-82ddbb0f65a0"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "8c1f5939dfaf4545b756f2eec1e75bf4"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "tokenizer"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "JI-NZMTxAstc",
        "outputId": "ae242580-0b27-4b1e-f86d-f2a690349626"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "LlamaTokenizerFast(name_or_path='meta-llama/Llama-2-7b-hf', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={\n",
              "\t0: AddedToken(\"<unk>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
              "\t1: AddedToken(\"<s>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
              "\t2: AddedToken(\"</s>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
              "}"
            ]
          },
          "metadata": {},
          "execution_count": 3
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "if tokenizer.pad_token == None:\n",
        "  tokenizer.pad_token = tokenizer.eos_token\n",
        "  tokenizer.pad_token_id = tokenizer.eos_token_id\n"
      ],
      "metadata": {
        "id": "zsYtkzTC58sM"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#from transformers import set_seed\n",
        "#set_seed(42)\n",
        "#Make sure we can generate text\n",
        "text =  \"Hello my name is Doron and I am\"\n",
        "inputs = tokenizer([text], return_tensors = \"pt\").input_ids.to(model.device)\n",
        "pred_ids = model.generate(input_ids=inputs, do_sample=True, max_new_tokens=32, repetition_penalty=1.2)\n",
        "#print(pred_ids)\n",
        "pred_text = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)\n",
        "print(pred_text[0])\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "GACVDdQ_6lIl",
        "outputId": "3dea2531-d8a2-4998-fdbc-fd310e4e39cd"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Hello my name is Doron and I am 49, married to a fantastic wife who loves my children very much. And she can't understand why I don't love them as\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "########################################################################\n",
        "#                               Settings\n",
        "########################################################################\n",
        "\n",
        "#Set the maximum number of tokens to generate\n",
        "max_new_tokens = 250\n",
        "\n",
        "#Set a the value of the repetition penalty\n",
        "#The higher the value, the less repetitive the generated text will be\n",
        "#Note that `repetition_penalty` has to be a strictly positive float\n",
        "repetition_penalty = 1.4\n",
        "\n",
        "#Set the text direction\n",
        "#For languages that are written from right to left (RTL), set rtl to True\n",
        "rtl = False\n",
        "\n",
        "########################################################################\n",
        "\n",
        "print(f\"Settings: max_new_tokens = {max_new_tokens}, repetition_penalty = {repetition_penalty}, rtl = {rtl}\")\n",
        "\n",
        "if rtl:\n",
        "    text_title += \" - RTL\"\n",
        "    text_align = 'right'\n",
        "    css = \"#output_text{direction: rtl} #input_text{direction: rtl}\"\n",
        "else:\n",
        "    text_align = 'left'\n",
        "    css = \"\"\n",
        "\n",
        "\n",
        "def generate(text = \"\"):\n",
        "    print(\"Create streamer\")\n",
        "    yield    \"[Please wait for an answer]\"\n",
        "\n",
        "    decode_kwargs = dict(skip_special_tokens = True, clean_up_tokenization_spaces = True)\n",
        "    streamer = TextIteratorStreamer(tokenizer, timeout = 5., decode_kwargs = decode_kwargs)\n",
        "\n",
        "    inputs = tokenizer([text], return_tensors = \"pt\").input_ids.to(model.device)\n",
        "    print(tokenizer.decode(inputs[0], skip_special_tokens=True))\n",
        "\n",
        "    generation_kwargs = dict(input_ids=inputs, streamer = streamer, max_new_tokens=max_new_tokens, repetition_penalty=repetition_penalty)\n",
        "\n",
        "    print(\"Create thread\")\n",
        "    thread = Thread(target = model.generate, kwargs = generation_kwargs)\n",
        "    thread.start()\n",
        "    generated_text = \"\"\n",
        "    for new_text in streamer:\n",
        "      if new_text == None:\n",
        "          continue\n",
        "      if tokenizer.eos_token not in new_text:\n",
        "          new_text = new_text.replace(tokenizer.pad_token, \"\").replace(tokenizer.bos_token, \"\")\n",
        "          yield generated_text + new_text\n",
        "          print(new_text, end =\"\")\n",
        "          generated_text += new_text\n",
        "      else:\n",
        "          new_text = new_text.replace(tokenizer.eos_token, \"\\n\")\n",
        "          print(new_text, end =\"\")\n",
        "          generated_text += new_text\n",
        "          return generated_text\n",
        "    return generated_text\n",
        "\n",
        "demo = gr.Interface(\n",
        "        title = text_title,\n",
        "        fn = generate,\n",
        "        inputs = gr.Textbox(label = \"Enter your prompt here\", elem_id = \"input_text\", text_align = text_align, rtl = rtl),\n",
        "        outputs = gr.Textbox(type = \"text\", label = \"Generated text will appear here (first click might fail due to timeout, so just click again)\", elem_id = \"output_text\", text_align = text_align, rtl = rtl),\n",
        "        css = css,\n",
        "        allow_flagging = 'never'\n",
        ")\n",
        "\n",
        "demo.queue()\n",
        "demo.launch(debug = True)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 851
        },
        "id": "Us-cb5OY4qC9",
        "outputId": "fff29120-871a-422b-a90b-79300b2a03ed"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Settings: max_new_tokens = 250, repetition_penalty = 1.4, rtl = False\n",
            "Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n",
            "\n",
            "Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().\n",
            "Running on public URL: https://bb97cc0c4e939c332d.gradio.live\n",
            "\n",
            "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "<div><iframe src=\"https://bb97cc0c4e939c332d.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
            "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Create streamer\n",
            "Question: I'm tired, what should I do?\n",
            "Answer:\n",
            "Create thread\n",
            " Question: I'm tired, what should I do?\n",
            "Answer: You are not alone. Many people feel this way at some point in their lives. It is a normal part of life and it can be overcome. The first step is to recognize that you are tired. Next, identify the cause of your fatigue. Is it physical or mental? If it is physical, then there are many things you can do to help yourself. Exercise regularly, eat healthy foods, get enough sleep, drink plenty of water, and take time to relax. If it is mental, then you may need to seek professional help. There are several types of therapy available for those who suffer from depression or anxiety. These include cognitive behavioral therapy, psychotherapy, and medication. Once you have identified the cause of your fatigue, you can begin to work on overcoming it. By taking care of yourself, you will be able to function better and enjoy life more.\n",
            "Question: What is the best way to deal with stress?\n",
            "Answer: Stress is a normal part of life. It is a natural response to challenges and changes in our environment. However, when stress becomes too much, it can have a negative impact on our health and well-being"
          ]
        }
      ]
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"provenance": [],
	"gpuType": "T4",
	"authorship_tag": "ABX9TyOkZEtBSgQTEZQXPIw4EOLp",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"language_info": {
	"name": "python"
	},
	"accelerator": "GPU",
	"widgets": {
	"application/vnd.jupyter.widget-state+json": {
	"8c1f5939dfaf4545b756f2eec1e75bf4": {
	"model_module": "@jupyter-widgets/controls",
	"model_name": "HBoxModel",
	"model_module_version": "1.5.0",
	"state": {
	"_dom_classes": [],
	"_model_module": "@jupyter-widgets/controls",
	"_model_module_version": "1.5.0",
	"_model_name": "HBoxModel",
	"_view_count": null,
	"_view_module": "@jupyter-widgets/controls",
	"_view_module_version": "1.5.0",
	"_view_name": "HBoxView",
	"box_style": "",
	"children": [
	"IPY_MODEL_2221b6ee4c904c28bd5884280df24dfc",
	"IPY_MODEL_f8707a3db4134a37a8e90d4bd9713c26",
	"IPY_MODEL_371fe6af56614ecaae324d243ec3d03a"
	],
	"layout": "IPY_MODEL_5dc96aae3469481b9aa04808292c13ae"
	}
	},
	"2221b6ee4c904c28bd5884280df24dfc": {
	"model_module": "@jupyter-widgets/controls",
	"model_name": "HTMLModel",
	"model_module_version": "1.5.0",
	"state": {
	"_dom_classes": [],
	"_model_module": "@jupyter-widgets/controls",
	"_model_module_version": "1.5.0",
	"_model_name": "HTMLModel",
	"_view_count": null,
	"_view_module": "@jupyter-widgets/controls",
	"_view_module_version": "1.5.0",
	"_view_name": "HTMLView",
	"description": "",
	"description_tooltip": null,
	"layout": "IPY_MODEL_526e7c3b01434ace889b466f298a9595",
	"placeholder": "",
	"style": "IPY_MODEL_0d05addda3974b9986b5383d809648b8",
	"value": "Loading checkpoint shards: 100%"
	}
	},
	"f8707a3db4134a37a8e90d4bd9713c26": {
	"model_module": "@jupyter-widgets/controls",
	"model_name": "FloatProgressModel",
	"model_module_version": "1.5.0",
	"state": {
	"_dom_classes": [],
	"_model_module": "@jupyter-widgets/controls",
	"_model_module_version": "1.5.0",
	"_model_name": "FloatProgressModel",
	"_view_count": null,
	"_view_module": "@jupyter-widgets/controls",
	"_view_module_version": "1.5.0",
	"_view_name": "ProgressView",
	"bar_style": "success",
	"description": "",
	"description_tooltip": null,
	"layout": "IPY_MODEL_ad67a168de8642d3b6800552beb57db0",
	"max": 3,
	"min": 0,
	"orientation": "horizontal",
	"style": "IPY_MODEL_f27ba639f7cf4c17b1b6000cbd5a0799",
	"value": 3
	}
	},
	"371fe6af56614ecaae324d243ec3d03a": {
	"model_module": "@jupyter-widgets/controls",
	"model_name": "HTMLModel",
	"model_module_version": "1.5.0",
	"state": {
	"_dom_classes": [],
	"_model_module": "@jupyter-widgets/controls",
	"_model_module_version": "1.5.0",
	"_model_name": "HTMLModel",
	"_view_count": null,
	"_view_module": "@jupyter-widgets/controls",
	"_view_module_version": "1.5.0",
	"_view_name": "HTMLView",
	"description": "",
	"description_tooltip": null,
	"layout": "IPY_MODEL_90a934616e5f45ffa987d64ff8dc5d13",
	"placeholder": "",
	"style": "IPY_MODEL_b7262e709af44d0c94d72eafe6a9c4c8",
	"value": " 3/3 [00:49<00:00, 15.17s/it]"
	}
	},
	"5dc96aae3469481b9aa04808292c13ae": {
	"model_module": "@jupyter-widgets/base",
	"model_name": "LayoutModel",
	"model_module_version": "1.2.0",
	"state": {
	"_model_module": "@jupyter-widgets/base",
	"_model_module_version": "1.2.0",
	"_model_name": "LayoutModel",
	"_view_count": null,
	"_view_module": "@jupyter-widgets/base",
	"_view_module_version": "1.2.0",
	"_view_name": "LayoutView",
	"align_content": null,
	"align_items": null,
	"align_self": null,
	"border": null,
	"bottom": null,
	"display": null,
	"flex": null,
	"flex_flow": null,
	"grid_area": null,
	"grid_auto_columns": null,
	"grid_auto_flow": null,
	"grid_auto_rows": null,
	"grid_column": null,
	"grid_gap": null,
	"grid_row": null,
	"grid_template_areas": null,
	"grid_template_columns": null,
	"grid_template_rows": null,
	"height": null,
	"justify_content": null,
	"justify_items": null,
	"left": null,
	"margin": null,
	"max_height": null,
	"max_width": null,
	"min_height": null,
	"min_width": null,
	"object_fit": null,
	"object_position": null,
	"order": null,
	"overflow": null,
	"overflow_x": null,
	"overflow_y": null,
	"padding": null,
	"right": null,
	"top": null,
	"visibility": null,
	"width": null
	}
	},
	"526e7c3b01434ace889b466f298a9595": {
	"model_module": "@jupyter-widgets/base",
	"model_name": "LayoutModel",
	"model_module_version": "1.2.0",
	"state": {
	"_model_module": "@jupyter-widgets/base",
	"_model_module_version": "1.2.0",
	"_model_name": "LayoutModel",
	"_view_count": null,
	"_view_module": "@jupyter-widgets/base",
	"_view_module_version": "1.2.0",
	"_view_name": "LayoutView",
	"align_content": null,
	"align_items": null,
	"align_self": null,
	"border": null,
	"bottom": null,
	"display": null,
	"flex": null,
	"flex_flow": null,
	"grid_area": null,
	"grid_auto_columns": null,
	"grid_auto_flow": null,
	"grid_auto_rows": null,
	"grid_column": null,
	"grid_gap": null,
	"grid_row": null,
	"grid_template_areas": null,
	"grid_template_columns": null,
	"grid_template_rows": null,
	"height": null,
	"justify_content": null,
	"justify_items": null,
	"left": null,
	"margin": null,
	"max_height": null,
	"max_width": null,
	"min_height": null,
	"min_width": null,
	"object_fit": null,
	"object_position": null,
	"order": null,
	"overflow": null,
	"overflow_x": null,
	"overflow_y": null,
	"padding": null,
	"right": null,
	"top": null,
	"visibility": null,
	"width": null
	}
	},
	"0d05addda3974b9986b5383d809648b8": {
	"model_module": "@jupyter-widgets/controls",
	"model_name": "DescriptionStyleModel",
	"model_module_version": "1.5.0",
	"state": {
	"_model_module": "@jupyter-widgets/controls",
	"_model_module_version": "1.5.0",
	"_model_name": "DescriptionStyleModel",
	"_view_count": null,
	"_view_module": "@jupyter-widgets/base",
	"_view_module_version": "1.2.0",
	"_view_name": "StyleView",
	"description_width": ""
	}
	},
	"ad67a168de8642d3b6800552beb57db0": {
	"model_module": "@jupyter-widgets/base",
	"model_name": "LayoutModel",
	"model_module_version": "1.2.0",
	"state": {
	"_model_module": "@jupyter-widgets/base",
	"_model_module_version": "1.2.0",
	"_model_name": "LayoutModel",
	"_view_count": null,
	"_view_module": "@jupyter-widgets/base",
	"_view_module_version": "1.2.0",
	"_view_name": "LayoutView",
	"align_content": null,
	"align_items": null,
	"align_self": null,
	"border": null,
	"bottom": null,
	"display": null,
	"flex": null,
	"flex_flow": null,
	"grid_area": null,
	"grid_auto_columns": null,
	"grid_auto_flow": null,
	"grid_auto_rows": null,
	"grid_column": null,
	"grid_gap": null,
	"grid_row": null,
	"grid_template_areas": null,
	"grid_template_columns": null,
	"grid_template_rows": null,
	"height": null,
	"justify_content": null,
	"justify_items": null,
	"left": null,
	"margin": null,
	"max_height": null,
	"max_width": null,
	"min_height": null,
	"min_width": null,
	"object_fit": null,
	"object_position": null,
	"order": null,
	"overflow": null,
	"overflow_x": null,
	"overflow_y": null,
	"padding": null,
	"right": null,
	"top": null,
	"visibility": null,
	"width": null
	}
	},
	"f27ba639f7cf4c17b1b6000cbd5a0799": {
	"model_module": "@jupyter-widgets/controls",
	"model_name": "ProgressStyleModel",
	"model_module_version": "1.5.0",
	"state": {
	"_model_module": "@jupyter-widgets/controls",
	"_model_module_version": "1.5.0",
	"_model_name": "ProgressStyleModel",
	"_view_count": null,
	"_view_module": "@jupyter-widgets/base",
	"_view_module_version": "1.2.0",
	"_view_name": "StyleView",
	"bar_color": null,
	"description_width": ""
	}
	},
	"90a934616e5f45ffa987d64ff8dc5d13": {
	"model_module": "@jupyter-widgets/base",
	"model_name": "LayoutModel",
	"model_module_version": "1.2.0",
	"state": {
	"_model_module": "@jupyter-widgets/base",
	"_model_module_version": "1.2.0",
	"_model_name": "LayoutModel",
	"_view_count": null,
	"_view_module": "@jupyter-widgets/base",
	"_view_module_version": "1.2.0",
	"_view_name": "LayoutView",
	"align_content": null,
	"align_items": null,
	"align_self": null,
	"border": null,
	"bottom": null,
	"display": null,
	"flex": null,
	"flex_flow": null,
	"grid_area": null,
	"grid_auto_columns": null,
	"grid_auto_flow": null,
	"grid_auto_rows": null,
	"grid_column": null,
	"grid_gap": null,
	"grid_row": null,
	"grid_template_areas": null,
	"grid_template_columns": null,
	"grid_template_rows": null,
	"height": null,
	"justify_content": null,
	"justify_items": null,
	"left": null,
	"margin": null,
	"max_height": null,
	"max_width": null,
	"min_height": null,
	"min_width": null,
	"object_fit": null,
	"object_position": null,
	"order": null,
	"overflow": null,
	"overflow_x": null,
	"overflow_y": null,
	"padding": null,
	"right": null,
	"top": null,
	"visibility": null,
	"width": null
	}
	},
	"b7262e709af44d0c94d72eafe6a9c4c8": {
	"model_module": "@jupyter-widgets/controls",
	"model_name": "DescriptionStyleModel",
	"model_module_version": "1.5.0",
	"state": {
	"_model_module": "@jupyter-widgets/controls",
	"_model_module_version": "1.5.0",
	"_model_name": "DescriptionStyleModel",
	"_view_count": null,
	"_view_module": "@jupyter-widgets/base",
	"_view_module_version": "1.2.0",
	"_view_name": "StyleView",
	"description_width": ""
	}
	}
	}
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/Norod/4f11bb36bea5c548d18f10f9d7ec09b0/apple_openelm-3b_cuda_gradio-demo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "RTmA16d-1eQV"
	},
	"outputs": [],
	"source": [
	"!pip install -q gradio accelerate"
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"Make sure to set the HF_TOKEN notebook secret and allow access to a read token"
	],
	"metadata": {
	"id": "MZt9Q8sKCNQK"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"import torch\n",
	"import gradio as gr\n",
	"from threading import Thread\n",
	"from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer\n",
	"\n",
	"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
	"torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32\n",
	"low_cpu_mem_usage = True if torch.cuda.is_available() else False\n",
	"\n",
	"checkpoint = \"OpenELM-3B-Instruct\"\n",
	"checkpoint_tok = \"meta-llama/Llama-2-7b-hf\"\n",
	"tokenizer = AutoTokenizer.from_pretrained(checkpoint_tok)\n",
	"model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch_dtype, trust_remote_code=True, low_cpu_mem_usage=low_cpu_mem_usage)\n",
	"model.to(device)\n",
	"\n",
	"text_title = checkpoint.replace(\"/\", \" - \") + ' (' + str(model.device) + ') - Gradio Demo'\n"
	],
	"metadata": {
	"id": "frvAniQU1iWm",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 49,
	"referenced_widgets": [
	"8c1f5939dfaf4545b756f2eec1e75bf4",
	"2221b6ee4c904c28bd5884280df24dfc",
	"f8707a3db4134a37a8e90d4bd9713c26",
	"371fe6af56614ecaae324d243ec3d03a",
	"5dc96aae3469481b9aa04808292c13ae",
	"526e7c3b01434ace889b466f298a9595",
	"0d05addda3974b9986b5383d809648b8",
	"ad67a168de8642d3b6800552beb57db0",
	"f27ba639f7cf4c17b1b6000cbd5a0799",
	"90a934616e5f45ffa987d64ff8dc5d13",
	"b7262e709af44d0c94d72eafe6a9c4c8"
	]
	},
	"outputId": "42590ee6-3b2b-4f51-931b-82ddbb0f65a0"
	},
	"execution_count": null,
	"outputs": [
	{
	"output_type": "display_data",
	"data": {
	"text/plain": [
	"Loading checkpoint shards: 0%\| \| 0/3 [00:00<?, ?it/s]"
	],
	"application/vnd.jupyter.widget-view+json": {
	"version_major": 2,
	"version_minor": 0,
	"model_id": "8c1f5939dfaf4545b756f2eec1e75bf4"
	}
	},
	"metadata": {}
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"tokenizer"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "JI-NZMTxAstc",
	"outputId": "ae242580-0b27-4b1e-f86d-f2a690349626"
	},
	"execution_count": null,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"LlamaTokenizerFast(name_or_path='meta-llama/Llama-2-7b-hf', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>'}, clean_up_tokenization_spaces=False), added_tokens_decoder={\n",
	"\t0: AddedToken(\"<unk>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
	"\t1: AddedToken(\"<s>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
	"\t2: AddedToken(\"</s>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
	"}"
	]
	},
	"metadata": {},
	"execution_count": 3
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"if tokenizer.pad_token == None:\n",
	" tokenizer.pad_token = tokenizer.eos_token\n",
	" tokenizer.pad_token_id = tokenizer.eos_token_id\n"
	],
	"metadata": {
	"id": "zsYtkzTC58sM"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"#from transformers import set_seed\n",
	"#set_seed(42)\n",
	"#Make sure we can generate text\n",
	"text = \"Hello my name is Doron and I am\"\n",
	"inputs = tokenizer([text], return_tensors = \"pt\").input_ids.to(model.device)\n",
	"pred_ids = model.generate(input_ids=inputs, do_sample=True, max_new_tokens=32, repetition_penalty=1.2)\n",
	"#print(pred_ids)\n",
	"pred_text = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)\n",
	"print(pred_text[0])\n"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "GACVDdQ_6lIl",
	"outputId": "3dea2531-d8a2-4998-fdbc-fd310e4e39cd"
	},
	"execution_count": null,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stderr",
	"text": [
	"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
	"Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
	]
	},
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Hello my name is Doron and I am 49, married to a fantastic wife who loves my children very much. And she can't understand why I don't love them as\n"
	]
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"\n",
	"########################################################################\n",
	"# Settings\n",
	"########################################################################\n",
	"\n",
	"#Set the maximum number of tokens to generate\n",
	"max_new_tokens = 250\n",
	"\n",
	"#Set a the value of the repetition penalty\n",
	"#The higher the value, the less repetitive the generated text will be\n",
	"#Note that `repetition_penalty` has to be a strictly positive float\n",
	"repetition_penalty = 1.4\n",
	"\n",
	"#Set the text direction\n",
	"#For languages that are written from right to left (RTL), set rtl to True\n",
	"rtl = False\n",
	"\n",
	"########################################################################\n",
	"\n",
	"print(f\"Settings: max_new_tokens = {max_new_tokens}, repetition_penalty = {repetition_penalty}, rtl = {rtl}\")\n",
	"\n",
	"if rtl:\n",
	" text_title += \" - RTL\"\n",
	" text_align = 'right'\n",
	" css = \"#output_text{direction: rtl} #input_text{direction: rtl}\"\n",
	"else:\n",
	" text_align = 'left'\n",
	" css = \"\"\n",
	"\n",
	"\n",
	"def generate(text = \"\"):\n",
	" print(\"Create streamer\")\n",
	" yield \"[Please wait for an answer]\"\n",
	"\n",
	" decode_kwargs = dict(skip_special_tokens = True, clean_up_tokenization_spaces = True)\n",
	" streamer = TextIteratorStreamer(tokenizer, timeout = 5., decode_kwargs = decode_kwargs)\n",
	"\n",
	" inputs = tokenizer([text], return_tensors = \"pt\").input_ids.to(model.device)\n",
	" print(tokenizer.decode(inputs[0], skip_special_tokens=True))\n",
	"\n",
	" generation_kwargs = dict(input_ids=inputs, streamer = streamer, max_new_tokens=max_new_tokens, repetition_penalty=repetition_penalty)\n",
	"\n",
	" print(\"Create thread\")\n",
	" thread = Thread(target = model.generate, kwargs = generation_kwargs)\n",
	" thread.start()\n",
	" generated_text = \"\"\n",
	" for new_text in streamer:\n",
	" if new_text == None:\n",
	" continue\n",
	" if tokenizer.eos_token not in new_text:\n",
	" new_text = new_text.replace(tokenizer.pad_token, \"\").replace(tokenizer.bos_token, \"\")\n",
	" yield generated_text + new_text\n",
	" print(new_text, end =\"\")\n",
	" generated_text += new_text\n",
	" else:\n",
	" new_text = new_text.replace(tokenizer.eos_token, \"\\n\")\n",
	" print(new_text, end =\"\")\n",
	" generated_text += new_text\n",
	" return generated_text\n",
	" return generated_text\n",
	"\n",
	"demo = gr.Interface(\n",
	" title = text_title,\n",
	" fn = generate,\n",
	" inputs = gr.Textbox(label = \"Enter your prompt here\", elem_id = \"input_text\", text_align = text_align, rtl = rtl),\n",
	" outputs = gr.Textbox(type = \"text\", label = \"Generated text will appear here (first click might fail due to timeout, so just click again)\", elem_id = \"output_text\", text_align = text_align, rtl = rtl),\n",
	" css = css,\n",
	" allow_flagging = 'never'\n",
	")\n",
	"\n",
	"demo.queue()\n",
	"demo.launch(debug = True)"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 851
	},
	"id": "Us-cb5OY4qC9",
	"outputId": "fff29120-871a-422b-a90b-79300b2a03ed"
	},
	"execution_count": null,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Settings: max_new_tokens = 250, repetition_penalty = 1.4, rtl = False\n",
	"Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n",
	"\n",
	"Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().\n",
	"Running on public URL: https://bb97cc0c4e939c332d.gradio.live\n",
	"\n",
	"This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
	]
	},
	{
	"output_type": "display_data",
	"data": {
	"text/plain": [
	"<IPython.core.display.HTML object>"
	],
	"text/html": [
	"<div><iframe src=\"https://bb97cc0c4e939c332d.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
	]
	},
	"metadata": {}
	},
	{
	"output_type": "stream",
	"name": "stderr",
	"text": [
	"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
	"Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
	]
	},
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Create streamer\n",
	"Question: I'm tired, what should I do?\n",
	"Answer:\n",
	"Create thread\n",
	" Question: I'm tired, what should I do?\n",
	"Answer: You are not alone. Many people feel this way at some point in their lives. It is a normal part of life and it can be overcome. The first step is to recognize that you are tired. Next, identify the cause of your fatigue. Is it physical or mental? If it is physical, then there are many things you can do to help yourself. Exercise regularly, eat healthy foods, get enough sleep, drink plenty of water, and take time to relax. If it is mental, then you may need to seek professional help. There are several types of therapy available for those who suffer from depression or anxiety. These include cognitive behavioral therapy, psychotherapy, and medication. Once you have identified the cause of your fatigue, you can begin to work on overcoming it. By taking care of yourself, you will be able to function better and enjoy life more.\n",
	"Question: What is the best way to deal with stress?\n",
	"Answer: Stress is a normal part of life. It is a natural response to challenges and changes in our environment. However, when stress becomes too much, it can have a negative impact on our health and well-being"
	]
	}
	]
	}
	]
	}