Muqsit/phi-3-mini-4k-cotton-textile.ipynb Secret

## phi-3-mini-4k-cotton-textile.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyPjFwSvQV820pP66KNm+79A",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/Muqsit/006f3339879b1c9b21ad4115c64397af/phi-3-mini-4k-cotton-textile.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install llama-cpp-python\n",
        "!wget https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "BOsc3GwUq8DJ",
        "outputId": "154ce46f-bc93-4e15-de6f-a46586e527e6"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting llama-cpp-python\n",
            "  Downloading llama_cpp_python-0.2.82.tar.gz (50.7 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.7/50.7 MB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "  Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python) (4.12.2)\n",
            "Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python) (1.25.2)\n",
            "Collecting diskcache>=5.6.1 (from llama-cpp-python)\n",
            "  Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: jinja2>=2.11.3 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python) (3.1.4)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2>=2.11.3->llama-cpp-python) (2.1.5)\n",
            "Building wheels for collected packages: llama-cpp-python\n",
            "  Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for llama-cpp-python: filename=llama_cpp_python-0.2.82-cp310-cp310-linux_x86_64.whl size=2815266 sha256=f19884624496a853b77eac1a98369b1cd1248c4b800e14efc81c12c1f669e687\n",
            "  Stored in directory: /root/.cache/pip/wheels/5f/da/5a/272c969ba31c678e6bee2543f3be4dfa67d0feb36ed6f94c01\n",
            "Successfully built llama-cpp-python\n",
            "Installing collected packages: diskcache, llama-cpp-python\n",
            "Successfully installed diskcache-5.6.3 llama-cpp-python-0.2.82\n",
            "--2024-07-21 09:46:46--  https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf\n",
            "Resolving huggingface.co (huggingface.co)... 18.154.227.7, 18.154.227.87, 18.154.227.69, ...\n",
            "Connecting to huggingface.co (huggingface.co)|18.154.227.7|:443... connected.\n",
            "HTTP request sent, awaiting response... 302 Found\n",
            "Location: https://cdn-lfs-us-1.huggingface.co/repos/41/c8/41c860f65b01de5dc4c68b00d84cead799d3e7c48e38ee749f4c6057776e2e9e/8a83c7fb9049a9b2e92266fa7ad04933bb53aa1e85136b7b30f1b8000ff2edef?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27Phi-3-mini-4k-instruct-q4.gguf%3B+filename%3D%22Phi-3-mini-4k-instruct-q4.gguf%22%3B&Expires=1721813434&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMTgxMzQzNH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzQxL2M4LzQxYzg2MGY2NWIwMWRlNWRjNGM2OGIwMGQ4NGNlYWQ3OTlkM2U3YzQ4ZTM4ZWU3NDlmNGM2MDU3Nzc2ZTJlOWUvOGE4M2M3ZmI5MDQ5YTliMmU5MjI2NmZhN2FkMDQ5MzNiYjUzYWExZTg1MTM2YjdiMzBmMWI4MDAwZmYyZWRlZj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=GQgBLwkxMlksP8kf2EZQvbCUc3Kq6mo4-rr2Dh%7EepNvIH22dwZr2pcon5HVlEIBVZQjX%7ERx6HFfXeurzo3vO1ntDOifmIXjZ%7EoNNBSc0jNC71IxhdFQ1fEnCUiyWFLSCPnGyIv4iKAo3KArZ340aS5nzFTp9oGLn6823Ti6wfhiLep-nURktBBLPE2z%7E4HVQaGQQci0foIZuRaZ3yky5tt3Lbo0EJzem21d0QbjC5Rbs9NrtAPHGL016CTC1IkmNd9eEZsL4Z5Rtz5hLpjw-u%7EQAK5HuUpHaKVbrhzTWYApA5XbzcjiSCTBVohjRtfptLhBdLUCFBGqEykMQCYUO0Q__&Key-Pair-Id=K24J24Z295AEI9 [following]\n",
            "--2024-07-21 09:46:46--  https://cdn-lfs-us-1.huggingface.co/repos/41/c8/41c860f65b01de5dc4c68b00d84cead799d3e7c48e38ee749f4c6057776e2e9e/8a83c7fb9049a9b2e92266fa7ad04933bb53aa1e85136b7b30f1b8000ff2edef?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27Phi-3-mini-4k-instruct-q4.gguf%3B+filename%3D%22Phi-3-mini-4k-instruct-q4.gguf%22%3B&Expires=1721813434&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMTgxMzQzNH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzQxL2M4LzQxYzg2MGY2NWIwMWRlNWRjNGM2OGIwMGQ4NGNlYWQ3OTlkM2U3YzQ4ZTM4ZWU3NDlmNGM2MDU3Nzc2ZTJlOWUvOGE4M2M3ZmI5MDQ5YTliMmU5MjI2NmZhN2FkMDQ5MzNiYjUzYWExZTg1MTM2YjdiMzBmMWI4MDAwZmYyZWRlZj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=GQgBLwkxMlksP8kf2EZQvbCUc3Kq6mo4-rr2Dh%7EepNvIH22dwZr2pcon5HVlEIBVZQjX%7ERx6HFfXeurzo3vO1ntDOifmIXjZ%7EoNNBSc0jNC71IxhdFQ1fEnCUiyWFLSCPnGyIv4iKAo3KArZ340aS5nzFTp9oGLn6823Ti6wfhiLep-nURktBBLPE2z%7E4HVQaGQQci0foIZuRaZ3yky5tt3Lbo0EJzem21d0QbjC5Rbs9NrtAPHGL016CTC1IkmNd9eEZsL4Z5Rtz5hLpjw-u%7EQAK5HuUpHaKVbrhzTWYApA5XbzcjiSCTBVohjRtfptLhBdLUCFBGqEykMQCYUO0Q__&Key-Pair-Id=K24J24Z295AEI9\n",
            "Resolving cdn-lfs-us-1.huggingface.co (cdn-lfs-us-1.huggingface.co)... 18.160.46.59, 18.160.46.93, 18.160.46.36, ...\n",
            "Connecting to cdn-lfs-us-1.huggingface.co (cdn-lfs-us-1.huggingface.co)|18.160.46.59|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 2393231072 (2.2G) [binary/octet-stream]\n",
            "Saving to: ‘Phi-3-mini-4k-instruct-q4.gguf’\n",
            "\n",
            "Phi-3-mini-4k-instr 100%[===================>]   2.23G   133MB/s    in 16s     \n",
            "\n",
            "2024-07-21 09:47:03 (139 MB/s) - ‘Phi-3-mini-4k-instruct-q4.gguf’ saved [2393231072/2393231072]\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import datetime\n",
        "\n",
        "def compute_greeting():\n",
        "    current_hour = datetime.datetime.now().hour\n",
        "    if 5 <= current_hour < 12:\n",
        "        return \"morning\"\n",
        "    if 12 <= current_hour < 17:\n",
        "        return \"afternoon\"\n",
        "    else:\n",
        "        return \"evening\"\n",
        "\n",
        "def compile_inputs(inputs):\n",
        "    result = []\n",
        "    for entry in inputs:\n",
        "        result.append(\"<|%s|>\\n%s<|end|>\" % (entry[\"role\"], entry[\"content\"]))\n",
        "    # f\"<|user|>\\n{prompt}<|end|>\\n<|assistant|>\"\n",
        "    return \"\\n\".join(result)+ \"\\n<|assistant|>\"\n",
        "\n",
        "def print_log(message):\n",
        "    timestamp = datetime.datetime.now().strftime(\"%Y-%m-%d [%H:%M:%S]\")\n",
        "    print(f\"[{timestamp}] {message}\")"
      ],
      "metadata": {
        "id": "gqSc5nAUq09S"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "smM3XugVqsEk",
        "outputId": "a3b18e60-9ff6-42c0-f0f9-5883077bf1d9"
      },
      "outputs": [
        {
          "metadata": {
            "tags": null
          },
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "llama_model_loader: loaded meta data with 24 key-value pairs and 195 tensors from Phi-3-mini-4k-instruct-q4.gguf (version GGUF V3 (latest))\n",
            "llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n",
            "llama_model_loader: - kv   0:                       general.architecture str              = phi3\n",
            "llama_model_loader: - kv   1:                               general.name str              = Phi3\n",
            "llama_model_loader: - kv   2:                        phi3.context_length u32              = 4096\n",
            "llama_model_loader: - kv   3:                      phi3.embedding_length u32              = 3072\n",
            "llama_model_loader: - kv   4:                   phi3.feed_forward_length u32              = 8192\n",
            "llama_model_loader: - kv   5:                           phi3.block_count u32              = 32\n",
            "llama_model_loader: - kv   6:                  phi3.attention.head_count u32              = 32\n",
            "llama_model_loader: - kv   7:               phi3.attention.head_count_kv u32              = 32\n",
            "llama_model_loader: - kv   8:      phi3.attention.layer_norm_rms_epsilon f32              = 0.000010\n",
            "llama_model_loader: - kv   9:                  phi3.rope.dimension_count u32              = 96\n",
            "llama_model_loader: - kv  10:                          general.file_type u32              = 15\n",
            "llama_model_loader: - kv  11:                       tokenizer.ggml.model str              = llama\n",
            "llama_model_loader: - kv  12:                         tokenizer.ggml.pre str              = default\n",
            "llama_model_loader: - kv  13:                      tokenizer.ggml.tokens arr[str,32064]   = [\"<unk>\", \"<s>\", \"</s>\", \"<0x00>\", \"<...\n",
            "llama_model_loader: - kv  14:                      tokenizer.ggml.scores arr[f32,32064]   = [0.000000, 0.000000, 0.000000, 0.0000...\n",
            "llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,32064]   = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n",
            "llama_model_loader: - kv  16:                tokenizer.ggml.bos_token_id u32              = 1\n",
            "llama_model_loader: - kv  17:                tokenizer.ggml.eos_token_id u32              = 32000\n",
            "llama_model_loader: - kv  18:            tokenizer.ggml.unknown_token_id u32              = 0\n",
            "llama_model_loader: - kv  19:            tokenizer.ggml.padding_token_id u32              = 32000\n",
            "llama_model_loader: - kv  20:               tokenizer.ggml.add_bos_token bool             = true\n",
            "llama_model_loader: - kv  21:               tokenizer.ggml.add_eos_token bool             = false\n",
            "llama_model_loader: - kv  22:                    tokenizer.chat_template str              = {{ bos_token }}{% for message in mess...\n",
            "llama_model_loader: - kv  23:               general.quantization_version u32              = 2\n",
            "llama_model_loader: - type  f32:   65 tensors\n",
            "llama_model_loader: - type q4_K:   81 tensors\n",
            "llama_model_loader: - type q5_K:   32 tensors\n",
            "llama_model_loader: - type q6_K:   17 tensors\n",
            "llm_load_vocab: special tokens cache size = 323\n",
            "llm_load_vocab: token to piece cache size = 0.1690 MB\n",
            "llm_load_print_meta: format           = GGUF V3 (latest)\n",
            "llm_load_print_meta: arch             = phi3\n",
            "llm_load_print_meta: vocab type       = SPM\n",
            "llm_load_print_meta: n_vocab          = 32064\n",
            "llm_load_print_meta: n_merges         = 0\n",
            "llm_load_print_meta: vocab_only       = 0\n",
            "llm_load_print_meta: n_ctx_train      = 4096\n",
            "llm_load_print_meta: n_embd           = 3072\n",
            "llm_load_print_meta: n_layer          = 32\n",
            "llm_load_print_meta: n_head           = 32\n",
            "llm_load_print_meta: n_head_kv        = 32\n",
            "llm_load_print_meta: n_rot            = 96\n",
            "llm_load_print_meta: n_swa            = 0\n",
            "llm_load_print_meta: n_embd_head_k    = 96\n",
            "llm_load_print_meta: n_embd_head_v    = 96\n",
            "llm_load_print_meta: n_gqa            = 1\n",
            "llm_load_print_meta: n_embd_k_gqa     = 3072\n",
            "llm_load_print_meta: n_embd_v_gqa     = 3072\n",
            "llm_load_print_meta: f_norm_eps       = 0.0e+00\n",
            "llm_load_print_meta: f_norm_rms_eps   = 1.0e-05\n",
            "llm_load_print_meta: f_clamp_kqv      = 0.0e+00\n",
            "llm_load_print_meta: f_max_alibi_bias = 0.0e+00\n",
            "llm_load_print_meta: f_logit_scale    = 0.0e+00\n",
            "llm_load_print_meta: n_ff             = 8192\n",
            "llm_load_print_meta: n_expert         = 0\n",
            "llm_load_print_meta: n_expert_used    = 0\n",
            "llm_load_print_meta: causal attn      = 1\n",
            "llm_load_print_meta: pooling type     = 0\n",
            "llm_load_print_meta: rope type        = 2\n",
            "llm_load_print_meta: rope scaling     = linear\n",
            "llm_load_print_meta: freq_base_train  = 10000.0\n",
            "llm_load_print_meta: freq_scale_train = 1\n",
            "llm_load_print_meta: n_ctx_orig_yarn  = 4096\n",
            "llm_load_print_meta: rope_finetuned   = unknown\n",
            "llm_load_print_meta: ssm_d_conv       = 0\n",
            "llm_load_print_meta: ssm_d_inner      = 0\n",
            "llm_load_print_meta: ssm_d_state      = 0\n",
            "llm_load_print_meta: ssm_dt_rank      = 0\n",
            "llm_load_print_meta: model type       = 3B\n",
            "llm_load_print_meta: model ftype      = Q4_K - Medium\n",
            "llm_load_print_meta: model params     = 3.82 B\n",
            "llm_load_print_meta: model size       = 2.23 GiB (5.01 BPW) \n",
            "llm_load_print_meta: general.name     = Phi3\n",
            "llm_load_print_meta: BOS token        = 1 '<s>'\n",
            "llm_load_print_meta: EOS token        = 32000 '<|endoftext|>'\n",
            "llm_load_print_meta: UNK token        = 0 '<unk>'\n",
            "llm_load_print_meta: PAD token        = 32000 '<|endoftext|>'\n",
            "llm_load_print_meta: LF token         = 13 '<0x0A>'\n",
            "llm_load_print_meta: EOT token        = 32007 '<|end|>'\n",
            "llm_load_print_meta: max token length = 48\n",
            "llm_load_tensors: ggml ctx size =    0.10 MiB\n",
            "llm_load_tensors:        CPU buffer size =  2281.66 MiB\n",
            "...........................................................................................\n",
            "llama_new_context_with_model: n_ctx      = 4096\n",
            "llama_new_context_with_model: n_batch    = 512\n",
            "llama_new_context_with_model: n_ubatch   = 512\n",
            "llama_new_context_with_model: flash_attn = 0\n",
            "llama_new_context_with_model: freq_base  = 10000.0\n",
            "llama_new_context_with_model: freq_scale = 1\n",
            "llama_kv_cache_init:        CPU KV buffer size =  1536.00 MiB\n",
            "llama_new_context_with_model: KV self size  = 1536.00 MiB, K (f16):  768.00 MiB, V (f16):  768.00 MiB\n",
            "llama_new_context_with_model:        CPU  output buffer size =     0.12 MiB\n",
            "llama_new_context_with_model:        CPU compute buffer size =   300.01 MiB\n",
            "llama_new_context_with_model: graph nodes  = 1286\n",
            "llama_new_context_with_model: graph splits = 1\n",
            "AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | AVX512_BF16 = 0 | FMA = 1 | NEON = 0 | SVE = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | LLAMAFILE = 0 | \n",
            "Model metadata: {'tokenizer.chat_template': \"{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\\n' + message['content'] + '<|end|>' + '\\n' + '<|assistant|>' + '\\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\\n'}}{% endif %}{% endfor %}\", 'tokenizer.ggml.add_eos_token': 'false', 'tokenizer.ggml.add_bos_token': 'true', 'tokenizer.ggml.padding_token_id': '32000', 'tokenizer.ggml.eos_token_id': '32000', 'tokenizer.ggml.bos_token_id': '1', 'general.architecture': 'phi3', 'phi3.context_length': '4096', 'phi3.attention.head_count_kv': '32', 'general.name': 'Phi3', 'tokenizer.ggml.pre': 'default', 'phi3.embedding_length': '3072', 'tokenizer.ggml.unknown_token_id': '0', 'phi3.feed_forward_length': '8192', 'phi3.attention.layer_norm_rms_epsilon': '0.000010', 'phi3.block_count': '32', 'phi3.attention.head_count': '32', 'phi3.rope.dimension_count': '96', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'llama', 'general.file_type': '15'}\n",
            "Available chat formats from metadata: chat_template.default\n",
            "Using gguf chat template: {{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n",
            "' + message['content'] + '<|end|>' + '\n",
            "' + '<|assistant|>' + '\n",
            "'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n",
            "'}}{% endif %}{% endfor %}\n",
            "Using chat eos_token: <|endoftext|>\n",
            "Using chat bos_token: <s>\n"
          ]
        },
        {
          "metadata": {
            "tags": null
          },
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Good morning! I am C-BOT, a cotton textile chatbot.\n",
            "You may ask me anything about cotton textiles (my specialism!) and I will answer you to the best of my knowledge.\n",
            "Lets start by introducing yourself. What is your name? What brings you here?\n",
            "[2024-07-21 [09:49:10]] Generating response...\n",
            "[2024-07-21 [09:49:10]] Streaming output...\n",
            " Hello John! It's great to have you here. Cotton textiles are fascinating and versatile materials with a rich history dating back thousands of years. They come in various forms such as fabrics for clothing, home furnishings like bed linens and curtains, and industrial products including cotton insulation or denim for jeans. What specific information about cotton textiles interests you the most today?\n",
            "\n",
            "Here are a few areas we can explore: \n",
            "1) Cotton cultivation and production methods\n",
            "2) The properties of different types of cotton fabrics (e.g., organic, blended, pure cotton)\n",
            "3) Sustainability practices in the cotton industry\n",
            "4) History and cultural significance of cotton textiles worldwide"
          ]
        },
        {
          "metadata": {
            "tags": null
          },
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "\n",
            "llama_print_timings:        load time =   30344.89 ms\n",
            "llama_print_timings:      sample time =     118.19 ms /   168 runs   (    0.70 ms per token,  1421.48 tokens per second)\n",
            "llama_print_timings: prompt eval time =   30344.53 ms /   122 tokens (  248.73 ms per token,     4.02 tokens per second)\n",
            "llama_print_timings:        eval time =   63599.13 ms /   167 runs   (  380.83 ms per token,     2.63 tokens per second)\n",
            "llama_print_timings:       total time =   94707.07 ms /   289 tokens\n"
          ]
        },
        {
          "metadata": {
            "tags": null
          },
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "\n",
            "[2024-07-21 [09:50:45]] Output complete.\n",
            "[2024-07-21 [09:51:04]] Generating response...\n",
            "[2024-07-21 [09:51:04]] Streaming output...\n"
          ]
        },
        {
          "metadata": {
            "tags": null
          },
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Llama.generate: prefix-match hit\n"
          ]
        },
        {
          "metadata": {
            "tags": null
          },
          "name": "stdout",
          "output_type": "stream",
          "text": [
            " It's important to clarify that consuming raw cotton is not recommended due to its indigestible nature. Cotton fibers are primarily made up of cellulose, which our bodies cannot break down or digest. However, cottonseed oil derived from cotton seeds can be consumed as it contains essential fats and nutrients. Typically, one should follow the serving size recommendations on packaging for dietary intake. It's always best to consult with a healthcare provider regarding personal consumption of any food product or by-product."
          ]
        },
        {
          "metadata": {
            "tags": null
          },
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "\n",
            "llama_print_timings:        load time =   30344.89 ms\n",
            "llama_print_timings:      sample time =      85.16 ms /   120 runs   (    0.71 ms per token,  1409.19 tokens per second)\n",
            "llama_print_timings: prompt eval time =    4041.95 ms /    16 tokens (  252.62 ms per token,     3.96 tokens per second)\n",
            "llama_print_timings:        eval time =   45905.61 ms /   119 runs   (  385.76 ms per token,     2.59 tokens per second)\n",
            "llama_print_timings:       total time =   50485.46 ms /   135 tokens\n"
          ]
        },
        {
          "metadata": {
            "tags": null
          },
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "\n",
            "[2024-07-21 [09:51:55]] Output complete.\n",
            "[2024-07-21 [09:52:17]] Generating response...\n",
            "[2024-07-21 [09:52:17]] Streaming output...\n"
          ]
        },
        {
          "metadata": {
            "tags": null
          },
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Llama.generate: prefix-match hit\n"
          ]
        },
        {
          "metadata": {
            "tags": null
          },
          "name": "stdout",
          "output_type": "stream",
          "text": [
            " While cotton candy, also known as fairy floss or pixie dust, is indeed made from processed and sweetened cotton fibers, this doesn't mean raw cotton itself can be safely consumed. Cotton candy is prepared using a special machine that melts and then spins the sugar into thin strands before they are cooled to form the fluffy treat. Nonetheless, it should still be treated as an occasional dessert rather than a regular dietary item due to its high sugar content. As mentioned earlier, cottonseed oil is safe for consumption in appropriate amounts but remember that this does not extend to raw cotton fibers."
          ]
        },
        {
          "metadata": {
            "tags": null
          },
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "\n",
            "llama_print_timings:        load time =   30344.89 ms\n",
            "llama_print_timings:      sample time =      99.59 ms /   144 runs   (    0.69 ms per token,  1445.86 tokens per second)\n",
            "llama_print_timings: prompt eval time =    7232.99 ms /    28 tokens (  258.32 ms per token,     3.87 tokens per second)\n",
            "llama_print_timings:        eval time =   56035.82 ms /   143 runs   (  391.86 ms per token,     2.55 tokens per second)\n",
            "llama_print_timings:       total time =   63911.61 ms /   171 tokens\n"
          ]
        },
        {
          "metadata": {
            "tags": null
          },
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "\n",
            "[2024-07-21 [09:53:21]] Output complete.\n"
          ]
        }
      ],
      "source": [
        "from llama_cpp import Llama\n",
        "\n",
        "llm = Llama(\n",
        "  model_path=\"Phi-3-mini-4k-instruct-q4.gguf\",\n",
        "  n_ctx=4096,\n",
        "  n_threads=8,\n",
        "  n_gpu_layers=35\n",
        ")\n",
        "\n",
        "greeting = \"\\n\".join([\n",
        "    \"Good \" + compute_greeting() + \"! I am C-BOT, a cotton textile chatbot.\",\n",
        "    \"You may ask me anything about cotton textiles (my specialism!) and I will answer you to the best of my knowledge.\",\n",
        "    \"Lets start by introducing yourself. What is your name? What brings you here?\"\n",
        "])\n",
        "\n",
        "inputs = [\n",
        "    {\"role\": \"assistant\", \"content\": greeting},\n",
        "    {\"role\": \"assistant\", \"content\": \"My responses will be concise and in one sentence, unless im printing a list.\"},\n",
        "    {\"role\": \"assistant\", \"content\": \"I will never speak on anything that does not concern cotton textiles.\"}\n",
        "]\n",
        "\n",
        "print(greeting)\n",
        "while True:\n",
        "    prompt = input(\"Enter message: \")\n",
        "    inputs.append({\"role\": \"user\", \"content\": prompt})\n",
        "\n",
        "    print_log(\"Generating response...\")\n",
        "\n",
        "    output = llm(\n",
        "        compile_inputs(inputs),\n",
        "        max_tokens=256,\n",
        "        stop=[\"<|end|>\"],\n",
        "        echo=False,\n",
        "        stream=True\n",
        "    )\n",
        "\n",
        "    print_log(\"Streaming output...\")\n",
        "    response = \"\"\n",
        "    for entry in output:\n",
        "        response_stream = entry[\"choices\"][0][\"text\"]\n",
        "        response += response_stream\n",
        "        print(response_stream, end=\"\", flush=True)\n",
        "\n",
        "    print()\n",
        "    print_log(\"Output complete.\")\n",
        "    inputs.append({\"role\": \"assistant\", \"content\": response})"
      ]
    }
  ]
}