Last active
May 22, 2024 03:46
-
-
Save mhama/ba2ce7cb210fcb5f60dfa60e6e7da89c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"gpuType": "T4" | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"accelerator": "GPU" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://colab.research.google.com/gist/mhama/ba2ce7cb210fcb5f60dfa60e6e7da89c/LlamaCppPython_JpLLM_Chat_Fast20240518.ipynb)\n", | |
"# 概要\n", | |
"llama-cpp-pythonのバイナリパッケージを利用することで、パラメータ数10B程度までの日本語LLMをおおむね3分以内に実行できます。\n", | |
"\n", | |
"以下の日本語LLMのURLを指定していますが、それぞれのLLMの利用規約に従ってください。\n", | |
"\n", | |
"* [DataPilot-ArrowPro-7B-KUJIRA](https://huggingface.co/DataPilot/ArrowPro-7B-KUJIRA) | [gguf](https://huggingface.co/mmnga/DataPilot-ArrowPro-7B-KUJIRA-gguf)\n", | |
"* [Japanese-Starling-ChatV-7B](https://huggingface.co/TFMC/Japanese-Starling-ChatV-7B-GGUF)\n", | |
"* [Phi-3-mini-128k-instruct](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) | [gguf](https://huggingface.co/mmnga/Phi-3-mini-128k-instruct-gguf)\n", | |
"* [Vecteus-v1](https://huggingface.co/Local-Novel-LLM-project/Vecteus-v1) | [gguf](https://huggingface.co/mmnga/Vecteus-v1-gguf)\n", | |
"\n", | |
"# 設定\n", | |
"\n", | |
"ランタイムは `GPU T4` を選択してください。" | |
], | |
"metadata": { | |
"id": "4yF4wM2dS5O8" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import os\n", | |
"if os.environ[\"COLAB_GPU\"] and int(os.environ[\"COLAB_GPU\"]) > 0:\n", | |
" print(\"OK: GPU接続済み。\\n\")\n", | |
"else:\n", | |
" print(\"★\"*32 + \"\\n WARNING! GPU(T4) ランタイムを選択してください。 \\n\" + \"★\"*32 + \"\\n\")\n", | |
"\n", | |
"# python, CUDAバージョンの表示\n", | |
"!python --version\n", | |
"!nvidia-smi" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "dSL__N_fYg6M", | |
"outputId": "c31881ca-a138-44bf-e2aa-eea1f40d20df" | |
}, | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"OK: GPU接続済み。\n", | |
"Python 3.10.12\n", | |
"Fri May 17 15:49:06 2024 \n", | |
"+---------------------------------------------------------------------------------------+\n", | |
"| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n", | |
"|-----------------------------------------+----------------------+----------------------+\n", | |
"| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", | |
"| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", | |
"| | | MIG M. |\n", | |
"|=========================================+======================+======================|\n", | |
"| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", | |
"| N/A 43C P8 9W / 70W | 0MiB / 15360MiB | 0% Default |\n", | |
"| | | N/A |\n", | |
"+-----------------------------------------+----------------------+----------------------+\n", | |
" \n", | |
"+---------------------------------------------------------------------------------------+\n", | |
"| Processes: |\n", | |
"| GPU GI CI PID Type Process name GPU Memory |\n", | |
"| ID ID Usage |\n", | |
"|=======================================================================================|\n", | |
"| No running processes found |\n", | |
"+---------------------------------------------------------------------------------------+\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 150 | |
}, | |
"id": "1r2tETDFsrRY", | |
"outputId": "05e49095-21de-49cc-f0e2-93254d599ca8" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"--2024-05-17 15:55:33-- https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.73-cu122/llama_cpp_python-0.2.73-cp310-cp310-linux_x86_64.whl\n", | |
"Resolving github.com (github.com)... 140.82.116.3\n", | |
"Connecting to github.com (github.com)|140.82.116.3|:443... connected.\n", | |
"HTTP request sent, awaiting response... 302 Found\n", | |
"Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/617868717/9cae7a92-1c22-4c5a-a818-1efb604e0955?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20240517%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240517T155533Z&X-Amz-Expires=300&X-Amz-Signature=54ef2db59b6a7e4c88dd900d3f1b2ca01b3b22aed86773c32bbd8550f8e4727a&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=617868717&response-content-disposition=attachment%3B%20filename%3Dllama_cpp_python-0.2.73-cp310-cp310-linux_x86_64.whl&response-content-type=application%2Foctet-stream [following]\n", | |
"--2024-05-17 15:55:34-- https://objects.githubusercontent.com/github-production-release-asset-2e65be/617868717/9cae7a92-1c22-4c5a-a818-1efb604e0955?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20240517%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240517T155533Z&X-Amz-Expires=300&X-Amz-Signature=54ef2db59b6a7e4c88dd900d3f1b2ca01b3b22aed86773c32bbd8550f8e4727a&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=617868717&response-content-disposition=attachment%3B%20filename%3Dllama_cpp_python-0.2.73-cp310-cp310-linux_x86_64.whl&response-content-type=application%2Foctet-stream\n", | |
"Resolving objects.githubusercontent.com (objects.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", | |
"Connecting to objects.githubusercontent.com (objects.githubusercontent.com)|185.199.108.133|:443... connected.\n", | |
"HTTP request sent, awaiting response... 200 OK\n", | |
"Length: 156645233 (149M) [application/octet-stream]\n", | |
"Saving to: ‘llama_cpp_python-0.2.73-cp310-cp310-linux_x86_64.whl’\n", | |
"\n", | |
"llama_cpp_python-0. 100%[===================>] 149.39M 49.3MB/s in 3.0s \n", | |
"\n", | |
"2024-05-17 15:55:37 (49.3 MB/s) - ‘llama_cpp_python-0.2.73-cp310-cp310-linux_x86_64.whl’ saved [156645233/156645233]\n", | |
"\n", | |
"Processing ./llama_cpp_python-0.2.73-cp310-cp310-linux_x86_64.whl\n", | |
"Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python==0.2.73) (4.11.0)\n", | |
"Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python==0.2.73) (1.25.2)\n", | |
"Collecting diskcache>=5.6.1 (from llama-cpp-python==0.2.73)\n", | |
" Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hRequirement already satisfied: jinja2>=2.11.3 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python==0.2.73) (3.1.4)\n", | |
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2>=2.11.3->llama-cpp-python==0.2.73) (2.1.5)\n", | |
"Installing collected packages: diskcache, llama-cpp-python\n", | |
"Successfully installed diskcache-5.6.3 llama-cpp-python-0.2.73\n" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.Javascript object>" | |
], | |
"application/javascript": [ | |
"google.colab.output.setIframeHeight(0, true, {maxHeight: 150})" | |
] | |
}, | |
"metadata": {} | |
} | |
], | |
"source": [ | |
"# @title llama-cpp-pythonのインストール(CUDA用バイナリ利用)\n", | |
"\n", | |
"# ログ表示を小さくする\n", | |
"from IPython.display import Javascript\n", | |
"display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 150})'''))\n", | |
"\n", | |
"# 現状環境 (CPython 3.10, Cuda 12.2) に対応するwheel(バイナリ)を取得してpip installする\n", | |
"!wget https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.73-cu122/llama_cpp_python-0.2.73-cp310-cp310-linux_x86_64.whl\n", | |
"!pip install ./llama_cpp_python-0.2.73-cp310-cp310-linux_x86_64.whl" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# @title LLMモデルファイルのダウンロード\n", | |
"\n", | |
"kujira_url=\"https://huggingface.co/mmnga/DataPilot-ArrowPro-7B-KUJIRA-gguf/resolve/main/DataPilot-ArrowPro-7B-KUJIRA-Q6_K.gguf\"\n", | |
"starling_chatv_url=\"https://huggingface.co/TFMC/Japanese-Starling-ChatV-7B-GGUF/resolve/main/japanese-starling-chatv-7b.Q5_K_M.gguf\"\n", | |
"phi3_mini_url=\"https://huggingface.co/mmnga/Phi-3-mini-128k-instruct-gguf/resolve/main/Phi-3-mini-128k-instruct-Q8_0.gguf\"\n", | |
"vecteus_url=\"https://huggingface.co/mmnga/Vecteus-v1-gguf/resolve/main/Vecteus-v1-Q8_0.gguf\"\n", | |
"\n", | |
"model_select=\"Vecteus\" # @param [\"Kujira\", \"Starling_ChatV\", \"Phi3_mini\", \"Vecteus\"]\n", | |
"if model_select==\"Kujira\":\n", | |
" url=kujira_url\n", | |
"if model_select==\"Starling_ChatV\":\n", | |
" url=starling_chatv_url\n", | |
"if model_select==\"Phi3_mini\":\n", | |
" url=phi3_mini_url\n", | |
"if model_select==\"Vecteus\":\n", | |
" url=vecteus_url\n", | |
"\n", | |
"modelfile=model_select+\".gguf\"\n", | |
"\n", | |
"# なにか別のLLMのURLを指定したい場合はこのあたりで modelfile と urlに値を入れれば良いです。\n", | |
"\n", | |
"!wget -O $modelfile $url\n", | |
"\n", | |
"print(\"ダウンロード完了:\" + modelfile);\n" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Jcp4YXN_tAcy", | |
"outputId": "0a5ed638-f22b-4298-86f0-2bb2cb30caf7" | |
}, | |
"execution_count": 60, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"--2024-05-17 16:27:17-- https://huggingface.co/mmnga/Vecteus-v1-gguf/resolve/main/Vecteus-v1-Q8_0.gguf\n", | |
"Resolving huggingface.co (huggingface.co)... 3.163.189.37, 3.163.189.74, 3.163.189.114, ...\n", | |
"Connecting to huggingface.co (huggingface.co)|3.163.189.37|:443... connected.\n", | |
"HTTP request sent, awaiting response... 302 Found\n", | |
"Location: https://cdn-lfs-us-1.huggingface.co/repos/d9/4c/d94c7f22b33eba3d74a63933b7ded1c79185fd7dd20f58318e3a3cb5b2e82780/e9a41eafd3618e23a9fc970a69a5f3c2b022fdce8978beabb1a5d19c86b895f1?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27Vecteus-v1-Q8_0.gguf%3B+filename%3D%22Vecteus-v1-Q8_0.gguf%22%3B&Expires=1716222437&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxNjIyMjQzN319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2Q5LzRjL2Q5NGM3ZjIyYjMzZWJhM2Q3NGE2MzkzM2I3ZGVkMWM3OTE4NWZkN2RkMjBmNTgzMThlM2EzY2I1YjJlODI3ODAvZTlhNDFlYWZkMzYxOGUyM2E5ZmM5NzBhNjlhNWYzYzJiMDIyZmRjZTg5NzhiZWFiYjFhNWQxOWM4NmI4OTVmMT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=izFhOv0CvniEcFObEHzZZGgfZv9RQvwDDk6nlYfXjg6WYKMvXgy-ZD2VINRV3YS976BaDbkA75NZPH7rSUjvsc6wUXGqceAcZoRLgf6o%7ESiq1evfrrPasqBxuvWzICSk68G35opBLnXOzGEvrVDiJS%7EH%7EHiTS6YtShI8TZemBzUtvbv4bSQaTfGvzik8-PJQFTgoMdCOQ9m4-GBl8%7E58XaiSnX2o3qq8Iqv9DT4%7EogBeQqql-4s1RlHlZ7meMoF9nhocvSNrkk%7E7MvV7FT68rYaHoWh3zhkDzsA-TMZeEdY3V694peO6p2ihObrEFleYS9uUTxzhEHTTnLcrXT3w1Q__&Key-Pair-Id=KCD77M1F0VK2B [following]\n", | |
"--2024-05-17 16:27:17-- https://cdn-lfs-us-1.huggingface.co/repos/d9/4c/d94c7f22b33eba3d74a63933b7ded1c79185fd7dd20f58318e3a3cb5b2e82780/e9a41eafd3618e23a9fc970a69a5f3c2b022fdce8978beabb1a5d19c86b895f1?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27Vecteus-v1-Q8_0.gguf%3B+filename%3D%22Vecteus-v1-Q8_0.gguf%22%3B&Expires=1716222437&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxNjIyMjQzN319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2Q5LzRjL2Q5NGM3ZjIyYjMzZWJhM2Q3NGE2MzkzM2I3ZGVkMWM3OTE4NWZkN2RkMjBmNTgzMThlM2EzY2I1YjJlODI3ODAvZTlhNDFlYWZkMzYxOGUyM2E5ZmM5NzBhNjlhNWYzYzJiMDIyZmRjZTg5NzhiZWFiYjFhNWQxOWM4NmI4OTVmMT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=izFhOv0CvniEcFObEHzZZGgfZv9RQvwDDk6nlYfXjg6WYKMvXgy-ZD2VINRV3YS976BaDbkA75NZPH7rSUjvsc6wUXGqceAcZoRLgf6o%7ESiq1evfrrPasqBxuvWzICSk68G35opBLnXOzGEvrVDiJS%7EH%7EHiTS6YtShI8TZemBzUtvbv4bSQaTfGvzik8-PJQFTgoMdCOQ9m4-GBl8%7E58XaiSnX2o3qq8Iqv9DT4%7EogBeQqql-4s1RlHlZ7meMoF9nhocvSNrkk%7E7MvV7FT68rYaHoWh3zhkDzsA-TMZeEdY3V694peO6p2ihObrEFleYS9uUTxzhEHTTnLcrXT3w1Q__&Key-Pair-Id=KCD77M1F0VK2B\n", | |
"Resolving cdn-lfs-us-1.huggingface.co (cdn-lfs-us-1.huggingface.co)... 3.163.189.28, 3.163.189.91, 3.163.189.127, ...\n", | |
"Connecting to cdn-lfs-us-1.huggingface.co (cdn-lfs-us-1.huggingface.co)|3.163.189.28|:443... connected.\n", | |
"HTTP request sent, awaiting response... 200 OK\n", | |
"Length: 7695857344 (7.2G) [binary/octet-stream]\n", | |
"Saving to: ‘Vecteus.gguf’\n", | |
"\n", | |
"Vecteus.gguf 100%[===================>] 7.17G 173MB/s in 42s \n", | |
"\n", | |
"2024-05-17 16:27:59 (175 MB/s) - ‘Vecteus.gguf’ saved [7695857344/7695857344]\n", | |
"\n", | |
"ダウンロード完了:Vecteus.gguf\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# @title 指定モデルを利用したLlama.cppの初期化 (GPU利用)\n", | |
"print(\"loading model: \" + modelfile)\n", | |
"\n", | |
"from llama_cpp import Llama\n", | |
"llm = Llama(\n", | |
" model_path=modelfile, # path to GGUF file\n", | |
" n_threads=2, # CPU cores\n", | |
" n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.\n", | |
" n_gpu_layers=43, # Change this value based on your model and your GPU VRAM pool.\n", | |
" n_ctx=4096, # Context window\n", | |
" verbose=False,\n", | |
")\n" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "FkqipX_BtFAZ", | |
"outputId": "ab8368bc-a10f-4e54-ed0e-c4aecc1197fc" | |
}, | |
"execution_count": 40, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"loading model: Starling_ChatV.gguf\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# @title チャット { vertical-output: true, form-width: \"1000px\"}\n", | |
"\n", | |
"# 長い出力行を改行するようにする\n", | |
"from IPython.display import HTML, Javascript\n", | |
"def set_css(): display(HTML('''<style>pre {white-space: pre-wrap;}</style>'''))\n", | |
"get_ipython().events.register('pre_run_cell', set_css)\n", | |
"\n", | |
"systemprompt = \"あなたは小説家です。ユーザーの指示に従います。\" #@param {type:\"string\"}\n", | |
"user_input = \"1日1回は3000kカロリー超えのドカ食いをすることでエクスタシーを感じてしまう女子社員(なぜか太らない体質)の日常の小説を書いて。なお、ドカ食いの前には非常に焦燥感を感じます。カロリーが多すぎると「至って」しまうので、おさえたいと思っているが、時々「至って」しまいます。ドカ食いはアイデンティティーであるため、ドカ食いをやめるという流れは禁止です。\" #@param {type:\"string\"}\n", | |
"\n", | |
"prompt= [\n", | |
" {\"role\": \"system\", \"content\": systemprompt},\n", | |
" {\"role\": \"user\", \"content\": user_input},\n", | |
"]\n", | |
"\n", | |
"print(\"model: \" + llm.model_path + \"\\n\")\n", | |
"\n", | |
"output = llm.create_chat_completion(\n", | |
" messages=prompt,\n", | |
" stop=[\"<</SYS>>\", \"[INST]\"],\n", | |
" stream=True\n", | |
")\n", | |
"\n", | |
"# non-streaming output\n", | |
"#print(output['choices'][0]['message']['content'])\n", | |
"\n", | |
"## streaming output\n", | |
"text = \"\"\n", | |
"for chunk in output:\n", | |
" delta = chunk['choices'][0]['delta']\n", | |
" if 'role' in delta:\n", | |
" print(delta['role'], end=': ')\n", | |
" elif 'content' in delta:\n", | |
" print(delta['content'], end='')\n", | |
" text += delta['content']\n", | |
"\n", | |
"#print(\"log: \" + text)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "3WOiL0HItHDH", | |
"outputId": "d04f78d8-a0b3-474c-fe2a-fbe4575bafee" | |
}, | |
"execution_count": 61, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"model: Starling_ChatV.gguf\n", | |
"\n", | |
" 宇宙に行く方法はいろいろあるんやけど、一般的なのはロケットに乗ることや。NASAやJAXAみたいな宇宙機関が開発してる有人宇宙船に乗って、宇宙空間を飛んで行くんや。まずは宇宙飛行士にならなあかんけど、それも相当難しいことやで。\n" | |
] | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
特に著作権を主張しません。自由にお使いください。