ninehills/chatglm-openai-api.ipynb

## chatglm-openai-api.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "gpuClass": "standard"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# **本地版本的在： https://github.com/ninehills/chatglm-openai-api  后续主要维护本地版本！！！**"
      ],
      "metadata": {
        "id": "wwkHnLlNRAgm"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 安装依赖"
      ],
      "metadata": {
        "id": "MZyEDzEHyDcN"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "eNbQgYuLxp-3",
        "outputId": "61a6f8a0-5c12-46c4-d4b8-0d71c7d8858d"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Requirement already satisfied: protobuf==3.20.0 in /usr/local/lib/python3.9/dist-packages (3.20.0)\n",
            "Requirement already satisfied: transformers==4.27.1 in /usr/local/lib/python3.9/dist-packages (4.27.1)\n",
            "Requirement already satisfied: icetk in /usr/local/lib/python3.9/dist-packages (0.0.4)\n",
            "Requirement already satisfied: cpm_kernels in /usr/local/lib/python3.9/dist-packages (1.0.11)\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.9/dist-packages (2.0.0+cu118)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (3.11.0)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (1.22.4)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (2.27.1)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (4.65.0)\n",
            "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (0.13.3)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (6.0)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (2022.10.31)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (23.1)\n",
            "Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (0.13.4)\n",
            "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.9/dist-packages (from icetk) (0.1.98)\n",
            "Requirement already satisfied: torchvision in /usr/local/lib/python3.9/dist-packages (from icetk) (0.15.1+cu118)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.9/dist-packages (from torch) (3.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from torch) (3.1.2)\n",
            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.9/dist-packages (from torch) (2.0.0)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.9/dist-packages (from torch) (4.5.0)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.9/dist-packages (from torch) (1.11.1)\n",
            "Requirement already satisfied: cmake in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch) (3.25.2)\n",
            "Requirement already satisfied: lit in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch) (16.0.1)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->torch) (2.1.2)\n",
            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->transformers==4.27.1) (2.0.12)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->transformers==4.27.1) (2022.12.7)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->transformers==4.27.1) (3.4)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->transformers==4.27.1) (1.26.15)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.9/dist-packages (from sympy->torch) (1.3.0)\n",
            "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.9/dist-packages (from torchvision->icetk) (8.4.0)\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Requirement already satisfied: fastapi in /usr/local/lib/python3.9/dist-packages (0.95.1)\n",
            "Requirement already satisfied: pydantic in /usr/local/lib/python3.9/dist-packages (1.10.7)\n",
            "Requirement already satisfied: uvicorn in /usr/local/lib/python3.9/dist-packages (0.21.1)\n",
            "Requirement already satisfied: sse_starlette in /usr/local/lib/python3.9/dist-packages (1.3.4)\n",
            "Requirement already satisfied: pyngrok in /usr/local/lib/python3.9/dist-packages (6.0.0)\n",
            "Requirement already satisfied: nest-asyncio in /usr/local/lib/python3.9/dist-packages (1.5.6)\n",
            "Requirement already satisfied: starlette<0.27.0,>=0.26.1 in /usr/local/lib/python3.9/dist-packages (from fastapi) (0.26.1)\n",
            "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.9/dist-packages (from pydantic) (4.5.0)\n",
            "Requirement already satisfied: h11>=0.8 in /usr/local/lib/python3.9/dist-packages (from uvicorn) (0.14.0)\n",
            "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.9/dist-packages (from uvicorn) (8.1.3)\n",
            "Requirement already satisfied: PyYAML in /usr/local/lib/python3.9/dist-packages (from pyngrok) (6.0)\n",
            "Requirement already satisfied: anyio<5,>=3.4.0 in /usr/local/lib/python3.9/dist-packages (from starlette<0.27.0,>=0.26.1->fastapi) (3.6.2)\n",
            "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.9/dist-packages (from anyio<5,>=3.4.0->starlette<0.27.0,>=0.26.1->fastapi) (1.3.0)\n",
            "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.9/dist-packages (from anyio<5,>=3.4.0->starlette<0.27.0,>=0.26.1->fastapi) (3.4)\n"
          ]
        }
      ],
      "source": [
        "import locale\n",
        "locale.getpreferredencoding = lambda: \"UTF-8\"\n",
        "\n",
        "!pip install protobuf==3.20.0 transformers==4.27.1 icetk cpm_kernels torch\n",
        "!pip install fastapi pydantic uvicorn sse_starlette pyngrok nest-asyncio"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 环境配置"
      ],
      "metadata": {
        "id": "VaY7_gNOzdoG"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import torch\n",
        "if torch.cuda.is_available() == False:\n",
        "    print(\"请在右上方 Colab 运行时类型中，选择 GPU 类型的运行时。\")"
      ],
      "metadata": {
        "id": "rVBP-rhiQhT5"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "chatglm_models = [\n",
        "    \"THUDM/chatglm-6b\",       # 原始模型\n",
        "    \"THUDM/chatglm-6b-int8\",  # int8 量化\n",
        "    \"THUDM/chatglm-6b-int4\",  # int4 量化\n",
        "]\n",
        "\n",
        "CHATGLM_MODEL = \"THUDM/chatglm-6b-int4\"\n",
        "\n",
        "# GPU/CPU\n",
        "RUNNING_DEVICE = \"GPU\"\n",
        "\n",
        "# API_TOKEN\n",
        "TOKEN = \"token1\"\n"
      ],
      "metadata": {
        "id": "JJOlpnVOyIeG"
      },
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 启动模型"
      ],
      "metadata": {
        "id": "CFzHRCMYz08N"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from transformers import AutoModel, AutoTokenizer\n",
        "\n",
        "def init_chatglm(model_name: str, running_device: str):\n",
        "    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
        "    model = AutoModel.from_pretrained(model_name, trust_remote_code=True)\n",
        "\n",
        "    if running_device == \"GPU\":\n",
        "        model = model.half().cuda()\n",
        "    else:\n",
        "        model = model.float()\n",
        "    model.eval()\n",
        "    return tokenizer, model\n",
        "\n",
        "tokenizer, model = init_chatglm(CHATGLM_MODEL, RUNNING_DEVICE)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "VIT1nTv_1a4X",
        "outputId": "fe2fe031-4f83-4c20-b882-19682c528647"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.\n",
            "Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.\n",
            "Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "No compiled kernel found.\n",
            "Compiling kernels : /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/e02ba894cf18f3fd9b2526c795f983683c4ec732/quantization_kernels.c\n",
            "Compiling gcc -O3 -fPIC -std=c99 /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/e02ba894cf18f3fd9b2526c795f983683c4ec732/quantization_kernels.c -shared -o /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/e02ba894cf18f3fd9b2526c795f983683c4ec732/quantization_kernels.so\n",
            "Load kernel : /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/e02ba894cf18f3fd9b2526c795f983683c4ec732/quantization_kernels.so\n",
            "Using quantization cache\n",
            "Applying quantization to glm layers\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 测试模型"
      ],
      "metadata": {
        "id": "b5RuRcsD3hPw"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "response, history = model.chat(tokenizer, \"你好\", history=[])\n",
        "print(response)\n",
        "print(history)\n",
        "response, history = model.chat(tokenizer, \"很高兴认识你\", history=history)\n",
        "print(response)\n",
        "print(history)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ZFY3ju-N3gk1",
        "outputId": "184af497-1c9a-46bc-b76c-c4847bebf1ee"
      },
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "WARNING:transformers_modules.THUDM.chatglm-6b-int4.e02ba894cf18f3fd9b2526c795f983683c4ec732.modeling_chatglm:The dtype of attention mask (torch.int64) is not bool\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "你好👋！我是人工智能助手 ChatGLM-6B，很高兴见到你，欢迎问我任何问题。\n",
            "[('你好', '你好👋！我是人工智能助手 ChatGLM-6B，很高兴见到你，欢迎问我任何问题。')]\n",
            "非常感谢你的欢迎！作为一名人工智能助手，我的任务是帮助人们和计算机之间进行更好的交流和理解。我很高兴能够提供帮助和回答问题，如果有任何疑问，随时都可以问我。\n",
            "[('你好', '你好👋！我是人工智能助手 ChatGLM-6B，很高兴见到你，欢迎问我任何问题。'), ('很高兴认识你', '非常感谢你的欢迎！作为一名人工智能助手，我的任务是帮助人们和计算机之间进行更好的交流和理解。我很高兴能够提供帮助和回答问题，如果有任何疑问，随时都可以问我。')]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 启动服务器（with tunnel）"
      ],
      "metadata": {
        "id": "LnexGhqK39NJ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!wget https://github.com/ninehills/chatgpt-web/releases/download/1.0/dist.tgz -O dist.tgz\n",
        "!tar zxvf dist.tgz\n",
        "!mv ./dist/index.html ./dist/assets/"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Up0KMv4vRtsP",
        "outputId": "6f3c9eaf-e1b2-4666-89db-c4664c872563"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "--2023-04-21 11:14:14--  https://github.com/ninehills/chatgpt-web/releases/download/1.0/dist.tgz\n",
            "Resolving github.com (github.com)... 192.30.255.112\n",
            "Connecting to github.com (github.com)|192.30.255.112|:443... connected.\n",
            "HTTP request sent, awaiting response... 302 Found\n",
            "Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/630857471/b1a34dc5-00fe-4969-b2dc-47af921d7dbf?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230421%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230421T111414Z&X-Amz-Expires=300&X-Amz-Signature=07ddaa9df7f8be4f3c4c4f484d2bdee8034f2963dcaaa2ea567ade506647179f&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=630857471&response-content-disposition=attachment%3B%20filename%3Ddist.tgz&response-content-type=application%2Foctet-stream [following]\n",
            "--2023-04-21 11:14:14--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/630857471/b1a34dc5-00fe-4969-b2dc-47af921d7dbf?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230421%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230421T111414Z&X-Amz-Expires=300&X-Amz-Signature=07ddaa9df7f8be4f3c4c4f484d2bdee8034f2963dcaaa2ea567ade506647179f&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=630857471&response-content-disposition=attachment%3B%20filename%3Ddist.tgz&response-content-type=application%2Foctet-stream\n",
            "Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.108.133, ...\n",
            "Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.109.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 104402 (102K) [application/octet-stream]\n",
            "Saving to: ‘dist.tgz’\n",
            "\n",
            "dist.tgz            100%[===================>] 101.96K  --.-KB/s    in 0.02s   \n",
            "\n",
            "2023-04-21 11:14:14 (6.55 MB/s) - ‘dist.tgz’ saved [104402/104402]\n",
            "\n",
            "./dist/\n",
            "./dist/index.html\n",
            "./dist/assets/\n",
            "./dist/assets/index-50bc3425.css\n",
            "./dist/assets/index-8effcbc5.js\n",
            "./dist/assets/logo-66789ed1.svg\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import torch\n",
        "from fastapi import FastAPI, Request, status, HTTPException\n",
        "from fastapi.responses import JSONResponse, HTMLResponse\n",
        "from pydantic import BaseModel\n",
        "from sse_starlette.sse import EventSourceResponse\n",
        "from fastapi.responses import RedirectResponse\n",
        "from fastapi.staticfiles import StaticFiles\n",
        "from fastapi.middleware.cors import CORSMiddleware\n",
        "import uvicorn\n",
        "import json\n",
        "from typing import List, Optional\n",
        "\n",
        "\n",
        "# 参考 https://github.com/josStorer/selfhostedAI/blob/master/main.py\n",
        "\n",
        "def torch_gc():\n",
        "    if torch.cuda.is_available():\n",
        "        with torch.cuda.device(0):\n",
        "            torch.cuda.empty_cache()\n",
        "            torch.cuda.ipc_collect()\n",
        "\n",
        "\n",
        "app = FastAPI()\n",
        "\n",
        "app.add_middleware(\n",
        "    CORSMiddleware,\n",
        "    allow_origins=['*'],\n",
        "    allow_credentials=True,\n",
        "    allow_methods=['*'],\n",
        "    allow_headers=['*'],\n",
        ")\n",
        "\n",
        "app.mount(\"/assets\", StaticFiles(directory=\"dist/assets\"), name=\"assets\")\n",
        "\n",
        "\n",
        "class Message(BaseModel):\n",
        "    role: str\n",
        "    content: str\n",
        "\n",
        "\n",
        "class Body(BaseModel):\n",
        "    messages: List[Message]\n",
        "    model: str\n",
        "    stream: Optional[bool] = False\n",
        "    max_tokens: Optional[int] = 256\n",
        "    temperature: Optional[float] = 0.95\n",
        "    top_p: Optional[float] = 0.7\n",
        "\n",
        "\n",
        "\n",
        "@app.get(\"/\")\n",
        "def read_root():\n",
        "    return RedirectResponse(\"/assets/index.html\")\n",
        "\n",
        "\n",
        "@app.get(\"/v1/models\")\n",
        "def get_models():\n",
        "    return {\"data\": [\n",
        "      {\n",
        "        \"created\": 1677610602,\n",
        "        \"id\": \"gpt-3.5-turbo\",\n",
        "        \"object\": \"model\",\n",
        "        \"owned_by\": \"openai\",\n",
        "        \"permission\": [\n",
        "          {\n",
        "            \"created\": 1680818747,\n",
        "            \"id\": \"modelperm-fTUZTbzFp7uLLTeMSo9ks6oT\",\n",
        "            \"object\": \"model_permission\",\n",
        "            \"allow_create_engine\": False,\n",
        "            \"allow_sampling\": True,\n",
        "            \"allow_logprobs\": True,\n",
        "            \"allow_search_indices\": False,\n",
        "            \"allow_view\": True,\n",
        "            \"allow_fine_tuning\": False,\n",
        "            \"organization\": \"*\",\n",
        "            \"group\": None,\n",
        "            \"is_blocking\": False\n",
        "          }\n",
        "        ],\n",
        "        \"root\": \"gpt-3.5-turbo\",\n",
        "        \"parent\": None,\n",
        "      },\n",
        "    ],\n",
        "    \"object\": \"list\"\n",
        "  }\n",
        "\n",
        "def generate_response(content: str):\n",
        "    return {\n",
        "        \"id\": \"chatcmpl-77PZm95TtxE0oYLRx3cxa6HtIDI7s\",\n",
        "        \"object\": \"chat.completion\",\n",
        "        \"created\": 1682000966,\n",
        "        \"model\": \"gpt-3.5-turbo-0301\",\n",
        "        \"usage\": {\n",
        "            \"prompt_tokens\": 10,\n",
        "            \"completion_tokens\": 10,\n",
        "            \"total_tokens\": 20,\n",
        "        },\n",
        "        \"choices\": [{\n",
        "            \"message\": {\"role\": \"assistant\", \"content\": content}, \"finish_reason\": \"stop\", \"index\": 0}\n",
        "        ]\n",
        "    }\n",
        "\n",
        "def generate_stream_response_start():\n",
        "    return {\"id\":\"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\",\"object\":\"chat.completion.chunk\",\"created\":1682004627,\"model\":\"gpt-3.5-turbo-0301\",\"choices\":[{\"delta\":{\"role\":\"assistant\"},\"index\":0,\"finish_reason\":None}]}\n",
        "\n",
        "def generate_stream_response(content: str):\n",
        "    return {\n",
        "        \"id\":\"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\",\n",
        "        \"object\":\"chat.completion.chunk\",\n",
        "        \"created\":1682004627,\n",
        "        \"model\":\"gpt-3.5-turbo-0301\",\n",
        "        \"choices\":[{\"delta\":{\"content\":content},\"index\":0,\"finish_reason\":None}\n",
        "    ]}\n",
        "\n",
        "def generate_stream_response_stop():\n",
        "    return {\"id\":\"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\",\"object\":\"chat.completion.chunk\",\"created\":1682004627,\"model\":\"gpt-3.5-turbo-0301\",\"choices\":[{\"delta\":{},\"index\":0,\"finish_reason\":\"stop\"}]}\n",
        "\n",
        "@app.post(\"/v1/chat/completions\")\n",
        "async def completions(body: Body, request: Request):\n",
        "    # Cancel token\n",
        "    #if request.headers.get(\"Authorization\").split(\" \")[1] != TOKEN:\n",
        "    #    raise HTTPException(status.HTTP_401_UNAUTHORIZED, \"Token is wrong!\")\n",
        "    \n",
        "    torch_gc()\n",
        "\n",
        "    question = body.messages[-1]\n",
        "    if question.role == 'user':\n",
        "        question = question.content\n",
        "    else:\n",
        "        raise HTTPException(status.HTTP_400_BAD_REQUEST, \"No Question Found\")\n",
        "\n",
        "    history = []\n",
        "    user_question = ''\n",
        "    for message in body.messages:\n",
        "        if message.role == 'system':\n",
        "            history.append((message.content, \"OK\"))\n",
        "        if message.role == 'user':\n",
        "            user_question = message.content\n",
        "        elif message.role == 'assistant':\n",
        "            assistant_answer = message.content\n",
        "            history.append((user_question, assistant_answer))\n",
        "\n",
        "    print(f\"question = {question}, history = {history}\")\n",
        "\n",
        "    \n",
        "    if body.stream:\n",
        "        async def eval_chatglm():\n",
        "            sends = 0\n",
        "            first = True\n",
        "            for response, _ in model.stream_chat(\n",
        "                tokenizer, question, history,\n",
        "                temperature=body.temperature,\n",
        "                top_p=body.top_p,\n",
        "                max_length=max(2048, body.max_tokens)):\n",
        "                if await request.is_disconnected():\n",
        "                    return\n",
        "                ret = response[sends:]\n",
        "                sends = len(response)\n",
        "                if first:\n",
        "                    first = False\n",
        "                    yield json.dumps(generate_stream_response_start(), ensure_ascii=False)\n",
        "                yield json.dumps(generate_stream_response(ret), ensure_ascii=False)\n",
        "            yield json.dumps(generate_stream_response_stop(), ensure_ascii=False)\n",
        "            yield \"[DONE]\"\n",
        "        return EventSourceResponse(eval_chatglm(), ping=10000)\n",
        "    else:\n",
        "        response, _ = model.chat(\n",
        "            tokenizer, question, history,\n",
        "            temperature=body.temperature,\n",
        "            top_p=body.top_p,\n",
        "            max_length=max(2048, body.max_tokens))\n",
        "        print(f\"response: {response}\")\n",
        "        return JSONResponse(content=generate_response(response))"
      ],
      "metadata": {
        "id": "tX5oiLQJ4BTX"
      },
      "execution_count": 11,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!wget https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -O cloudflared\n",
        "!chmod a+x cloudflared\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Cej2JQTKXZwR",
        "outputId": "ff5ee79d-af42-4a32-ebd1-dc0d26d6815c"
      },
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "--2023-04-21 11:14:53--  https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64\n",
            "Resolving github.com (github.com)... 192.30.255.113\n",
            "Connecting to github.com (github.com)|192.30.255.113|:443... connected.\n",
            "HTTP request sent, awaiting response... 302 Found\n",
            "Location: https://github.com/cloudflare/cloudflared/releases/download/2023.4.1/cloudflared-linux-amd64 [following]\n",
            "--2023-04-21 11:14:53--  https://github.com/cloudflare/cloudflared/releases/download/2023.4.1/cloudflared-linux-amd64\n",
            "Reusing existing connection to github.com:443.\n",
            "HTTP request sent, awaiting response... 302 Found\n",
            "Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/106867604/fdb83b81-e6e9-461a-bea7-838deb4b1387?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230421%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230421T111453Z&X-Amz-Expires=300&X-Amz-Signature=957bcb143be393c13ac68884edb62ed105de2954d10c8ceee08b96a4f625a9fe&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=106867604&response-content-disposition=attachment%3B%20filename%3Dcloudflared-linux-amd64&response-content-type=application%2Foctet-stream [following]\n",
            "--2023-04-21 11:14:53--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/106867604/fdb83b81-e6e9-461a-bea7-838deb4b1387?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230421%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230421T111453Z&X-Amz-Expires=300&X-Amz-Signature=957bcb143be393c13ac68884edb62ed105de2954d10c8ceee08b96a4f625a9fe&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=106867604&response-content-disposition=attachment%3B%20filename%3Dcloudflared-linux-amd64&response-content-type=application%2Foctet-stream\n",
            "Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
            "Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.108.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 36184431 (35M) [application/octet-stream]\n",
            "Saving to: ‘cloudflared’\n",
            "\n",
            "cloudflared         100%[===================>]  34.51M   230MB/s    in 0.2s    \n",
            "\n",
            "2023-04-21 11:14:53 (230 MB/s) - ‘cloudflared’ saved [36184431/36184431]\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# 在 Notebook 中运行所需\n",
        "import nest_asyncio\n",
        "nest_asyncio.apply()\n",
        "\n",
        "#from pyngrok import ngrok, conf\n",
        "\n",
        "#ngrok.set_auth_token(os.environ[\"ngrok_token\"])\n",
        "#http_tunnel = ngrok.connect(8000)\n",
        "#print(http_tunnel.public_url)\n",
        "\n",
        "import subprocess\n",
        "print(\"start cloudflared runnel\")\n",
        "f = open(\"stdout\", \"w\")\n",
        "p = subprocess.Popen(['./cloudflared', '--url', 'http://localhost:8000'], bufsize=0, stdout=f, stderr=subprocess.STDOUT)\n",
        "\n",
        "import time\n",
        "\n",
        "time.sleep(3)\n",
        "\n",
        "!grep -F trycloudflare stdout\n",
        "\n",
        "print(\"start app\")\n",
        "import uvicorn\n",
        "uvicorn.run(app, port=8000)\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "6bPIXXdn8dG0",
        "outputId": "7bdcd46b-631d-4f2f-b889-19d125468dbc"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "start cloudflared runnel\n",
            "2023-04-21T11:14:57Z INF Requesting new quick Tunnel on trycloudflare.com...\n",
            "2023-04-21T11:14:58Z INF |  https://settle-debian-brand-relates.trycloudflare.com                                     |\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "INFO:     Started server process [19734]\n",
            "INFO:     Waiting for application startup.\n",
            "INFO:     Application startup complete.\n",
            "INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "start app\n",
            "INFO:     103.181.1.35:0 - \"GET / HTTP/1.1\" 307 Temporary Redirect\n",
            "INFO:     103.181.1.35:0 - \"GET /index.html HTTP/1.1\" 200 OK\n",
            "INFO:     103.181.1.35:0 - \"GET /index-50bc3425.css HTTP/1.1\" 200 OK\n",
            "INFO:     103.181.1.35:0 - \"GET /index-8effcbc5.js HTTP/1.1\" 200 OK\n",
            "INFO:     34.83.203.92:0 - \"GET /index.html HTTP/1.1\" 200 OK\n",
            "INFO:     34.83.203.92:0 - \"GET /index.html HTTP/1.1\" 200 OK\n",
            "INFO:     103.227.66.197:0 - \"GET /index.html HTTP/1.1\" 200 OK\n",
            "INFO:     34.83.203.92:0 - \"GET /index.html HTTP/1.1\" 200 OK\n",
            "INFO:     34.83.203.92:0 - \"GET /index-50bc3425.css HTTP/1.1\" 200 OK\n",
            "INFO:     34.83.203.92:0 - \"GET /index-8effcbc5.js HTTP/1.1\" 200 OK\n",
            "INFO:     103.181.1.35:0 - \"GET /logo-66789ed1.svg HTTP/1.1\" 200 OK\n",
            "INFO:     103.227.66.197:0 - \"GET /index-50bc3425.css HTTP/1.1\" 200 OK\n",
            "INFO:     103.227.66.197:0 - \"GET /index-8effcbc5.js HTTP/1.1\" 200 OK\n",
            "INFO:     34.83.203.92:0 - \"GET /index.html/assets/logo-66789ed1.svg HTTP/1.1\" 404 Not Found\n",
            "INFO:     103.227.66.197:0 - \"GET /logo-66789ed1.svg HTTP/1.1\" 200 OK\n",
            "INFO:     34.83.203.92:0 - \"GET /favicon.ico HTTP/1.1\" 404 Not Found\n",
            "question = 你好, history = []\n",
            "response: 你好👋！我是人工智能助手 ChatGLM-6B，很高兴见到你，欢迎问我任何问题。\n",
            "INFO:     103.181.1.35:0 - \"POST /v1/chat/completions HTTP/1.1\" 200 OK\n",
            "question = 给我写一首夏天的诗, history = [('你好', '你好👋！我是人工智能助手 ChatGLM-6B，很高兴见到你，欢迎问我任何问题。')]\n",
            "response: 当太阳升起，\n",
            "夏日的清新空气，\n",
            "在微风中拂过，\n",
            "树影摇曳，鸟儿欢唱。\n",
            "\n",
            "蝉鸣声不断，\n",
            "在树上跳跃，\n",
            "游泳池的水清澈，\n",
            "闪闪发光。\n",
            "\n",
            "在海滩上，\n",
            "沙子温暖，\n",
            "阳光照耀，\n",
            "海浪拍打着岸边，\n",
            "人们欢笑着，\n",
            "享受这美好的夏日时光。\n",
            "\n",
            "这是一首关于夏天的诗，\n",
            "一段关于享受和欢乐的记忆，\n",
            "在夏天的阳光下，\n",
            "我们享受着生命的美好。\n",
            "INFO:     103.181.1.35:0 - \"POST /v1/chat/completions HTTP/1.1\" 200 OK\n",
            "INFO:     103.181.1.35:0 - \"GET /v1/models HTTP/1.1\" 200 OK\n",
            "INFO:     103.181.1.35:0 - \"GET /v1/models HTTP/1.1\" 200 OK\n",
            "INFO:     103.181.1.35:0 - \"GET /v1/models HTTP/1.1\" 200 OK\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 访问\n",
        "### 页面：\n",
        "\n",
        "请在上一个代码块的输出中找到类似于 https://submitted-skilled-impose-wheat.trycloudflare.com  的地址，在浏览器打开即可。\n",
        "\n",
        "注意可能需要登上10s左右时间，让 tunnel开始工作。\n",
        "\n",
        "Set your OpenAI API key below: 随便填写，比如111\n",
        "\n",
        " \n",
        "### 非流式接口\n",
        "\n",
        "```bash\n",
        "curl -vvv https://6d8f-130-211-208-193.ngrok.io/v1/chat/completions \\\n",
        "  -H \"Content-Type: application/json\" \\\n",
        "  -H \"Authorization: Bearer token1\" \\\n",
        "  -d '{ \"stream\": false,\n",
        "    \"model\": \"gpt-3.5-turbo\",\n",
        "    \"messages\": [{\"role\": \"user\", \"content\": \"写一首夏天的诗\"}]\n",
        "  }'\n",
        "```\n",
        "\n",
        "输出：\n",
        "```json\n",
        "{\"id\":\"chatcmpl-77PZm95TtxE0oYLRx3cxa6HtIDI7s\",\"object\":\"chat.completion\",\"created\":1682000966,\"model\":\"gpt-3.5-turbo-0301\",\"usage\":{\"prompt_tokens\":10,\"completion_tokens\":10,\"total_tokens\":20},\"choices\":[{\"message\":{\"role\":\"assistant\",\"content\":\"夏日的阳光下，\\n树叶闪烁着翠绿的光芒，\\n蝉鸣声不断响起，\\n伴着鸟儿的欢快歌唱。\\n\\n人们穿着轻便的衣服，\\n享受这清凉的夏日时光，\\n在海滩上晒着太阳，\\n喝着清凉的饮料，\\n聊天、欢笑、无忧无虑。\\n\\n清晨的日出，\\n天边呈现出美丽的红色，\\n太阳慢慢地升起，\\n照耀着整个天空。\\n\\n在公园里漫步，\\n欣赏着花草树木的美丽，\\n夏日的天空和大地，\\n让人感到无限的快乐。\\n\\n夏日的风吹过，\\n带来了凉爽的感觉，\\n让人感受到生命的美好，\\n让人感受到夏日的热情。\\n\\n这是一个美好的季节，\\n一个充满欢乐和热情的夏日，\\n让我们珍惜这美好的时光，\\n享受这夏日带来的快乐。\"},\"finish_reason\":\"stop\",\"index\":0}]}\n",
        "```\n",
        "\n",
        "### 流式接口\n",
        "\n",
        "```bash\n",
        "curl -vvv https://6d8f-130-211-208-193.ngrok.io/v1/chat/completions \\\n",
        "  -H \"Content-Type: application/json\" \\\n",
        "  -H \"Authorization: Bearer token1\" \\\n",
        "  -d '{ \"stream\": true, \n",
        "    \"model\": \"gpt-3.5-turbo\",\n",
        "    \"messages\": [{\"role\": \"user\", \"content\": \"写一首夏天的诗\"}]\n",
        "  }'\n",
        "```\n",
        "\n",
        "输出：\n",
        "```json\n",
        "\n",
        "data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {\"content\": \"难忘的\"}, \"index\": 0, \"finish_reason\": null}]}\n",
        "\n",
        "data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {\"content\": \"夏日\"}, \"index\": 0, \"finish_reason\": null}]}\n",
        "\n",
        "data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {\"content\": \"时光\"}, \"index\": 0, \"finish_reason\": null}]}\n",
        "\n",
        "data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {\"content\": \"。\"}, \"index\": 0, \"finish_reason\": null}]}\n",
        "\n",
        "data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {}, \"index\": 0, \"finish_reason\": \"stop\"}]}\n",
        "\n",
        "data: [DONE]\n",
        "```\n"
      ],
      "metadata": {
        "id": "xd6IIdSaIUhr"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!ps aux | grep cloudflare"
      ],
      "metadata": {
        "id": "rJeaEOP9X3-4"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}