Skip to content

Instantly share code, notes, and snippets.

@chottokun
Created August 4, 2024 06:54
Show Gist options
  • Save chottokun/f2f5c138c8927ac7d2ebd0afba2b75d3 to your computer and use it in GitHub Desktop.
Save chottokun/f2f5c138c8927ac7d2ebd0afba2b75d3 to your computer and use it in GitHub Desktop.
ft_gemma-2b-it_rev2.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"machine_shape": "hm",
"gpuType": "T4",
"mount_file_id": "1-Vs3LfWAy1WNmZe-UlJYYRmzMqmr1tga",
"authorship_tag": "ABX9TyO46hE5aLGELBGldnz2o+u+",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"b39cac2a8f3b4d19821ef4248a0014df": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_c1a4d0757616437886ca1305daa96ec5",
"IPY_MODEL_5131bbb322504465bb752354ff3eec11",
"IPY_MODEL_9e7238ea25b443a18dac0d1f4eb967c0"
],
"layout": "IPY_MODEL_8db0fc2e5de7496c97ff460facc7a85d"
}
},
"c1a4d0757616437886ca1305daa96ec5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_c01f9911444d4535b27de3e1ba59a27c",
"placeholder": "​",
"style": "IPY_MODEL_5b5da187608944e98ebcb26f15df848a",
"value": "Loading checkpoint shards: 100%"
}
},
"5131bbb322504465bb752354ff3eec11": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b247249437164bac8de5dad7d995de87",
"max": 2,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_04d8c3d0cfcc45d58f4c97e32945f589",
"value": 2
}
},
"9e7238ea25b443a18dac0d1f4eb967c0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_e9f4e7b46d304787b10e8f636872dfda",
"placeholder": "​",
"style": "IPY_MODEL_7865cee22e5b4ed1ab7b02e726f0e2d6",
"value": " 2/2 [00:04<00:00,  1.86s/it]"
}
},
"8db0fc2e5de7496c97ff460facc7a85d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c01f9911444d4535b27de3e1ba59a27c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"5b5da187608944e98ebcb26f15df848a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"b247249437164bac8de5dad7d995de87": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"04d8c3d0cfcc45d58f4c97e32945f589": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"e9f4e7b46d304787b10e8f636872dfda": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"7865cee22e5b4ed1ab7b02e726f0e2d6": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"80bee963c8144bd181777fdb4b74b826": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_189f0bc1948e4b90845a40762c8a3dda",
"IPY_MODEL_f0661a4ace9c47bda38dc8ea3f864121",
"IPY_MODEL_7ec9b41770e24974948485b096937e77"
],
"layout": "IPY_MODEL_49cbd413823149c9a9c98c8ae9c78be7"
}
},
"189f0bc1948e4b90845a40762c8a3dda": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_94d94454d0034eaf875a290ec35f319e",
"placeholder": "​",
"style": "IPY_MODEL_f85f0f10adf1470fa5a695163d4e09c6",
"value": "Loading checkpoint shards: 100%"
}
},
"f0661a4ace9c47bda38dc8ea3f864121": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_7e0e6619d8e64025b5c66a48fdf3902a",
"max": 2,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_2b6a85f76c91448888ded5dbcc157b15",
"value": 2
}
},
"7ec9b41770e24974948485b096937e77": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_d5d602dcabb94e04896d173e35f5c07a",
"placeholder": "​",
"style": "IPY_MODEL_ea2f90f6fbaa4e28b11f2a435984b57f",
"value": " 2/2 [00:03<00:00,  1.60s/it]"
}
},
"49cbd413823149c9a9c98c8ae9c78be7": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"94d94454d0034eaf875a290ec35f319e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f85f0f10adf1470fa5a695163d4e09c6": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"7e0e6619d8e64025b5c66a48fdf3902a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"2b6a85f76c91448888ded5dbcc157b15": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"d5d602dcabb94e04896d173e35f5c07a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"ea2f90f6fbaa4e28b11f2a435984b57f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/chottokun/f2f5c138c8927ac7d2ebd0afba2b75d3/ft_gemma-2b-it_rev2.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [
"Gemma-2b-itをファインチューニングできるか挑戦"
],
"metadata": {
"id": "fegfmQ-4GXDd"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "M904-LBAFMre"
},
"outputs": [],
"source": [
"!pip install -q transformers datasets accelerate bitsandbytes peft trl"
]
},
{
"cell_type": "code",
"source": [
"import torch\n",
"from datasets import load_dataset\n",
"from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments\n",
"from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model\n",
"from trl import SFTTrainer\n"
],
"metadata": {
"id": "PZoJt_S0FQXE"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"model_name = \"google/gemma-2b-it\"\n",
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
"tokenizer.pad_token = tokenizer.eos_token\n",
"\n",
"bnb_config = BitsAndBytesConfig(\n",
" load_in_4bit=True,\n",
" bnb_4bit_quant_type=\"nf4\",\n",
" bnb_4bit_compute_dtype=torch.float16,\n",
")\n",
"\n",
"model = AutoModelForCausalLM.from_pretrained(\n",
" model_name,\n",
" quantization_config=bnb_config,\n",
" device_map=\"auto\",\n",
")\n",
"model.config.use_cache = False\n",
"model = prepare_model_for_kbit_training(model)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 118,
"referenced_widgets": [
"b39cac2a8f3b4d19821ef4248a0014df",
"c1a4d0757616437886ca1305daa96ec5",
"5131bbb322504465bb752354ff3eec11",
"9e7238ea25b443a18dac0d1f4eb967c0",
"8db0fc2e5de7496c97ff460facc7a85d",
"c01f9911444d4535b27de3e1ba59a27c",
"5b5da187608944e98ebcb26f15df848a",
"b247249437164bac8de5dad7d995de87",
"04d8c3d0cfcc45d58f4c97e32945f589",
"e9f4e7b46d304787b10e8f636872dfda",
"7865cee22e5b4ed1ab7b02e726f0e2d6"
]
},
"id": "MnOXO71cFTAv",
"outputId": "16da63d4-80b2-41af-e12a-d9e1c4fc6247"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.\n",
"Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use\n",
"`config.hidden_activation` if you want to override this behaviour.\n",
"See https://github.com/huggingface/transformers/pull/29402 for more details.\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "b39cac2a8f3b4d19821ef4248a0014df"
}
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"lora_config = LoraConfig(\n",
" r=8,\n",
" lora_alpha=32,\n",
" target_modules=[\"q_proj\", \"v_proj\"],\n",
" lora_dropout=0.05,\n",
" bias=\"none\",\n",
" task_type=\"CAUSAL_LM\"\n",
")\n",
"\n",
"model = get_peft_model(model, lora_config)"
],
"metadata": {
"id": "lOTfs0xeFWg6"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# ござるを使わせていただきます。\n",
"dataset = load_dataset(\"bbz662bbz/databricks-dolly-15k-ja-gozarinnemon\", split=\"train\")\n",
"\n",
"def formatting_prompts_func(example):\n",
" output = f\"### Instruction: {example['instruction']}\\n\\n### Response: {example['output']}\"\n",
" return {'text': output}\n",
"\n",
"formatted_dataset = dataset.map(formatting_prompts_func)\n"
],
"metadata": {
"id": "fWrz2s_XF0Wv"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# 簡易データセット\n",
"# data = [\n",
"# {\"instruction\": \"日本の首都は?\", \"response\": \"日本の首都は東京です。\"},\n",
"# {\"instruction\": \"一番高い山は?\", \"response\": \"一番高い山はエベレストです。\"},\n",
"# {\"instruction\": \"今日の天気は?\", \"response\": \"今日の天気は晴れです。\"},\n",
"# ]"
],
"metadata": {
"id": "YMupDWoCfmPF"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import torch\n",
"from datasets import load_dataset\n",
"from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments\n",
"from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model\n",
"from trl import SFTTrainer\n",
"\n",
"# Gemma-2b-itのロード\n",
"model_name = \"google/gemma-2b-it\"\n",
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
"tokenizer.pad_token = tokenizer.eos_token\n",
"\n",
"# 4-bit Quantizationの設定\n",
"bnb_config = BitsAndBytesConfig(\n",
" load_in_4bit=True,\n",
" bnb_4bit_quant_type=\"nf4\",\n",
" bnb_4bit_compute_dtype=torch.float16,\n",
")\n",
"\n",
"# モデルのロードと準備\n",
"model = AutoModelForCausalLM.from_pretrained(\n",
" model_name,\n",
" quantization_config=bnb_config,\n",
" device_map=\"auto\",\n",
" trust_remote_code=True # Gemmaのモデルを読み込むために必要\n",
")\n",
"model.config.use_cache = False\n",
"model = prepare_model_for_kbit_training(model)\n",
"\n",
"# LoRAの設定\n",
"lora_config = LoraConfig(\n",
" r=8,\n",
" lora_alpha=32,\n",
" target_modules=[\"q_proj\", \"v_proj\"],\n",
" lora_dropout=0.05,\n",
" bias=\"none\",\n",
" task_type=\"CAUSAL_LM\"\n",
")\n",
"\n",
"# LoRAの適用\n",
"model = get_peft_model(model, lora_config)\n",
"\n",
"# # データセットのロード\n",
"# dataset = load_dataset(\"bbz662bbz/databricks-dolly-15k-ja-gozarinnemon\", split=\"train\")\n",
"\n",
"# # プロンプトのフォーマット\n",
"# def formatting_prompts_func(example):\n",
"# output = f\"### Instruction: {example['instruction']}\\n\\n### Response: {example['output']}\"\n",
"# return {'text': output}\n",
"\n",
"# formatted_dataset = dataset.map(formatting_prompts_func)\n",
"\n",
"# トークナイズ関数\n",
"def tokenize_function(examples):\n",
" return tokenizer(examples[\"text\"], truncation=True, padding=\"max_length\", max_length=512)\n",
"\n",
"# データセットのトークナイズ\n",
"tokenized_dataset = formatted_dataset.map(tokenize_function, batched=True)\n",
"\n",
"# SFTTrainerの設定\n",
"training_args = TrainingArguments(\n",
" output_dir=\"./results\",\n",
" num_train_epochs=3,\n",
" per_device_train_batch_size=4,\n",
" gradient_accumulation_steps=4,\n",
" learning_rate=2e-4,\n",
" fp16=True,\n",
" save_total_limit=3,\n",
" logging_steps=100,\n",
" save_steps=500,\n",
" push_to_hub=False,\n",
" logging_dir='./logs', # TensorBoardのログディレクトリ\n",
" # resume_from_checkpoint=\"./results\" # チェックポイントのディレクトリを指定\n",
")\n",
"\n",
"# トレーナーの初期化\n",
"trainer = SFTTrainer(\n",
" model=model,\n",
" args=training_args,\n",
" train_dataset=tokenized_dataset,\n",
" dataset_text_field=\"text\"\n",
")\n",
"\n",
"# ファインチューニングの実行\n",
"trainer.train()\n",
"\n",
"# ファインチューニング済みモデルとトークナイザーの保存\n",
"model.save_pretrained(\"./gemma-2b-finetuned\")\n",
"tokenizer.save_pretrained(\"./gemma-2b-finetuned\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000,
"referenced_widgets": [
"80bee963c8144bd181777fdb4b74b826",
"189f0bc1948e4b90845a40762c8a3dda",
"f0661a4ace9c47bda38dc8ea3f864121",
"7ec9b41770e24974948485b096937e77",
"49cbd413823149c9a9c98c8ae9c78be7",
"94d94454d0034eaf875a290ec35f319e",
"f85f0f10adf1470fa5a695163d4e09c6",
"7e0e6619d8e64025b5c66a48fdf3902a",
"2b6a85f76c91448888ded5dbcc157b15",
"d5d602dcabb94e04896d173e35f5c07a",
"ea2f90f6fbaa4e28b11f2a435984b57f"
]
},
"id": "AwYkTg_YhBp8",
"outputId": "83e6fd21-dcfc-432f-b1a9-4db20b86fa03"
},
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "80bee963c8144bd181777fdb4b74b826"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_deprecation.py:100: FutureWarning: Deprecated argument(s) used in '__init__': dataset_text_field. Will not be supported from version '1.0.0'.\n",
"\n",
"Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.\n",
" warnings.warn(message, FutureWarning)\n",
"/usr/local/lib/python3.10/dist-packages/trl/trainer/sft_trainer.py:289: UserWarning: You didn't pass a `max_seq_length` argument to the SFTTrainer, this will default to 1024\n",
" warnings.warn(\n",
"/usr/local/lib/python3.10/dist-packages/trl/trainer/sft_trainer.py:318: UserWarning: You passed a `dataset_text_field` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n",
" warnings.warn(\n",
"/usr/local/lib/python3.10/dist-packages/trl/trainer/sft_trainer.py:408: UserWarning: You passed a tokenizer with `padding_side` not equal to `right` to the SFTTrainer. This might lead to some unexpected behaviour due to overflow issues when training a model in half-precision. You might consider adding `tokenizer.padding_side = 'right'` to your code.\n",
" warnings.warn(\n",
"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n",
" warnings.warn(\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='2814' max='2814' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [2814/2814 7:36:03, Epoch 2/3]\n",
" </div>\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>Step</th>\n",
" <th>Training Loss</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>100</td>\n",
" <td>0.822800</td>\n",
" </tr>\n",
" <tr>\n",
" <td>200</td>\n",
" <td>0.662900</td>\n",
" </tr>\n",
" <tr>\n",
" <td>300</td>\n",
" <td>0.643900</td>\n",
" </tr>\n",
" <tr>\n",
" <td>400</td>\n",
" <td>0.615200</td>\n",
" </tr>\n",
" <tr>\n",
" <td>500</td>\n",
" <td>0.646100</td>\n",
" </tr>\n",
" <tr>\n",
" <td>600</td>\n",
" <td>0.625900</td>\n",
" </tr>\n",
" <tr>\n",
" <td>700</td>\n",
" <td>0.616300</td>\n",
" </tr>\n",
" <tr>\n",
" <td>800</td>\n",
" <td>0.622500</td>\n",
" </tr>\n",
" <tr>\n",
" <td>900</td>\n",
" <td>0.609600</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1000</td>\n",
" <td>0.597700</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1100</td>\n",
" <td>0.600700</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1200</td>\n",
" <td>0.603200</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1300</td>\n",
" <td>0.590300</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1400</td>\n",
" <td>0.613300</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1500</td>\n",
" <td>0.571700</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1600</td>\n",
" <td>0.593100</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1700</td>\n",
" <td>0.578700</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1800</td>\n",
" <td>0.592500</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1900</td>\n",
" <td>0.576600</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2000</td>\n",
" <td>0.567300</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2100</td>\n",
" <td>0.587400</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2200</td>\n",
" <td>0.543700</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2300</td>\n",
" <td>0.567500</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2400</td>\n",
" <td>0.601100</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2500</td>\n",
" <td>0.573400</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2600</td>\n",
" <td>0.591400</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2700</td>\n",
" <td>0.572400</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2800</td>\n",
" <td>0.580100</td>\n",
" </tr>\n",
" </tbody>\n",
"</table><p>"
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n",
" warnings.warn(\n",
"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n",
" warnings.warn(\n",
"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n",
" warnings.warn(\n",
"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n",
" warnings.warn(\n",
"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n",
" warnings.warn(\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"('./gemma-2b-finetuned/tokenizer_config.json',\n",
" './gemma-2b-finetuned/special_tokens_map.json',\n",
" './gemma-2b-finetuned/tokenizer.model',\n",
" './gemma-2b-finetuned/added_tokens.json',\n",
" './gemma-2b-finetuned/tokenizer.json')"
]
},
"metadata": {},
"execution_count": 7
}
]
},
{
"source": [
"# ファインチューニングしたモデルをロード\n",
"# model = AutoModelForCausalLM.from_pretrained(\"./gemma-2b-finetuned\")\n",
"# tokenizer = AutoTokenizer.from_pretrained(\"./gemma-2b-finetuned\")\n",
"\n",
"# テスト用のプロンプト\n",
"prompt = \"富士山と日本の首都には関係性がありますか?\"\n",
"\n",
"# トークナイズ\n",
"inputs = tokenizer(prompt, return_tensors=\"pt\").to(\"cuda\") # Move inputs to GPU\n",
"\n",
"# モデルに入力して生成\n",
"output = model.generate(inputs.input_ids, max_length=50, num_return_sequences=1)\n",
"\n",
"# 生成されたテキストをデコード\n",
"generated_text = tokenizer.decode(output[0], skip_special_tokens=True)\n",
"\n",
"print(generated_text)"
],
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "3QanqqPgQ4Ub",
"outputId": "b8dd4bcb-3f43-46f2-915f-7a2850d2c13d"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.\n",
"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n",
" warnings.warn(\n",
"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:91: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n",
" warnings.warn(\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"富士山と日本の首都には関係性がありますか?\n",
"\n",
"日本は、富士山と東京の間に関係性がありますでござる。\n",
"\n",
"富士山は、日本の首都東京の南東に位置する山で、東京の都市圏の重要な要素\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from peft import PeftModel\n",
"\n",
"# マージ\n",
"# model = PeftModel.from_pretrained(model, \"./gemma-2b-finetuned\")\n",
"model = model.merge_and_unload()\n",
"\n",
"# マージ後のモデルを保存\n",
"model.save_pretrained(\"./gemma-2b-merged\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Ra5WZm2hxs7d",
"outputId": "0ee1bea2-cc85-40c9-aedd-c1656892e58d"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/peft/tuners/lora/bnb.py:336: UserWarning: Merge lora module to 4-bit linear may get different generations due to rounding errors.\n",
" warnings.warn(\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!zip -r gemma-2b-merged.zip gemma-2b-merged\n",
"!zip -r results.zip results"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "0yWq69bI4WUw",
"outputId": "8face763-44b9-4b8f-fd0e-6ebf0a5fd8be"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" adding: gemma-2b-merged/ (stored 0%)\n",
" adding: gemma-2b-merged/config.json (deflated 55%)\n",
" adding: gemma-2b-merged/model.safetensors (deflated 38%)\n",
" adding: gemma-2b-merged/generation_config.json (deflated 29%)\n",
" adding: results/ (stored 0%)\n",
" adding: results/checkpoint-2814/ (stored 0%)\n",
" adding: results/checkpoint-2814/training_args.bin (deflated 51%)\n",
" adding: results/checkpoint-2814/tokenizer_config.json (deflated 95%)\n",
" adding: results/checkpoint-2814/tokenizer.model (deflated 51%)\n",
" adding: results/checkpoint-2814/special_tokens_map.json (deflated 76%)\n",
" adding: results/checkpoint-2814/adapter_model.safetensors (deflated 8%)\n",
" adding: results/checkpoint-2814/optimizer.pt (deflated 8%)\n",
" adding: results/checkpoint-2814/scheduler.pt (deflated 56%)\n",
" adding: results/checkpoint-2814/README.md (deflated 66%)\n",
" adding: results/checkpoint-2814/trainer_state.json (deflated 72%)\n",
" adding: results/checkpoint-2814/rng_state.pth (deflated 25%)\n",
" adding: results/checkpoint-2814/adapter_config.json (deflated 51%)\n",
" adding: results/checkpoint-2814/tokenizer.json (deflated 72%)\n",
" adding: results/checkpoint-2000/ (stored 0%)\n",
" adding: results/checkpoint-2000/training_args.bin (deflated 51%)\n",
" adding: results/checkpoint-2000/tokenizer_config.json (deflated 95%)\n",
" adding: results/checkpoint-2000/tokenizer.model (deflated 51%)\n",
" adding: results/checkpoint-2000/special_tokens_map.json (deflated 76%)\n",
" adding: results/checkpoint-2000/adapter_model.safetensors (deflated 8%)\n",
" adding: results/checkpoint-2000/optimizer.pt (deflated 8%)\n",
" adding: results/checkpoint-2000/scheduler.pt (deflated 55%)\n",
" adding: results/checkpoint-2000/README.md (deflated 66%)\n",
" adding: results/checkpoint-2000/trainer_state.json (deflated 71%)\n",
" adding: results/checkpoint-2000/rng_state.pth (deflated 25%)\n",
" adding: results/checkpoint-2000/adapter_config.json (deflated 51%)\n",
" adding: results/checkpoint-2000/tokenizer.json (deflated 72%)\n",
" adding: results/checkpoint-2500/ (stored 0%)\n",
" adding: results/checkpoint-2500/training_args.bin (deflated 51%)\n",
" adding: results/checkpoint-2500/tokenizer_config.json (deflated 95%)\n",
" adding: results/checkpoint-2500/tokenizer.model (deflated 51%)\n",
" adding: results/checkpoint-2500/special_tokens_map.json (deflated 76%)\n",
" adding: results/checkpoint-2500/adapter_model.safetensors (deflated 8%)\n",
" adding: results/checkpoint-2500/optimizer.pt (deflated 8%)\n",
" adding: results/checkpoint-2500/scheduler.pt (deflated 55%)\n",
" adding: results/checkpoint-2500/README.md (deflated 66%)\n",
" adding: results/checkpoint-2500/trainer_state.json (deflated 72%)\n",
" adding: results/checkpoint-2500/rng_state.pth (deflated 25%)\n",
" adding: results/checkpoint-2500/adapter_config.json (deflated 51%)\n",
" adding: results/checkpoint-2500/tokenizer.json (deflated 72%)\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [],
"metadata": {
"id": "eCx5JdtO4d9Y"
}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment