Created
August 4, 2024 06:54
-
-
Save chottokun/f2f5c138c8927ac7d2ebd0afba2b75d3 to your computer and use it in GitHub Desktop.
ft_gemma-2b-it_rev2.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"machine_shape": "hm", | |
"gpuType": "T4", | |
"mount_file_id": "1-Vs3LfWAy1WNmZe-UlJYYRmzMqmr1tga", | |
"authorship_tag": "ABX9TyO46hE5aLGELBGldnz2o+u+", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"accelerator": "GPU", | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"b39cac2a8f3b4d19821ef4248a0014df": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HBoxModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HBoxView", | |
"box_style": "", | |
"children": [ | |
"IPY_MODEL_c1a4d0757616437886ca1305daa96ec5", | |
"IPY_MODEL_5131bbb322504465bb752354ff3eec11", | |
"IPY_MODEL_9e7238ea25b443a18dac0d1f4eb967c0" | |
], | |
"layout": "IPY_MODEL_8db0fc2e5de7496c97ff460facc7a85d" | |
} | |
}, | |
"c1a4d0757616437886ca1305daa96ec5": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_c01f9911444d4535b27de3e1ba59a27c", | |
"placeholder": "", | |
"style": "IPY_MODEL_5b5da187608944e98ebcb26f15df848a", | |
"value": "Loading checkpoint shards: 100%" | |
} | |
}, | |
"5131bbb322504465bb752354ff3eec11": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "FloatProgressModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "ProgressView", | |
"bar_style": "success", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_b247249437164bac8de5dad7d995de87", | |
"max": 2, | |
"min": 0, | |
"orientation": "horizontal", | |
"style": "IPY_MODEL_04d8c3d0cfcc45d58f4c97e32945f589", | |
"value": 2 | |
} | |
}, | |
"9e7238ea25b443a18dac0d1f4eb967c0": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_e9f4e7b46d304787b10e8f636872dfda", | |
"placeholder": "", | |
"style": "IPY_MODEL_7865cee22e5b4ed1ab7b02e726f0e2d6", | |
"value": " 2/2 [00:04<00:00, 1.86s/it]" | |
} | |
}, | |
"8db0fc2e5de7496c97ff460facc7a85d": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"c01f9911444d4535b27de3e1ba59a27c": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"5b5da187608944e98ebcb26f15df848a": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"b247249437164bac8de5dad7d995de87": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"04d8c3d0cfcc45d58f4c97e32945f589": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "ProgressStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"bar_color": null, | |
"description_width": "" | |
} | |
}, | |
"e9f4e7b46d304787b10e8f636872dfda": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"7865cee22e5b4ed1ab7b02e726f0e2d6": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"80bee963c8144bd181777fdb4b74b826": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HBoxModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HBoxView", | |
"box_style": "", | |
"children": [ | |
"IPY_MODEL_189f0bc1948e4b90845a40762c8a3dda", | |
"IPY_MODEL_f0661a4ace9c47bda38dc8ea3f864121", | |
"IPY_MODEL_7ec9b41770e24974948485b096937e77" | |
], | |
"layout": "IPY_MODEL_49cbd413823149c9a9c98c8ae9c78be7" | |
} | |
}, | |
"189f0bc1948e4b90845a40762c8a3dda": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_94d94454d0034eaf875a290ec35f319e", | |
"placeholder": "", | |
"style": "IPY_MODEL_f85f0f10adf1470fa5a695163d4e09c6", | |
"value": "Loading checkpoint shards: 100%" | |
} | |
}, | |
"f0661a4ace9c47bda38dc8ea3f864121": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "FloatProgressModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "ProgressView", | |
"bar_style": "success", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_7e0e6619d8e64025b5c66a48fdf3902a", | |
"max": 2, | |
"min": 0, | |
"orientation": "horizontal", | |
"style": "IPY_MODEL_2b6a85f76c91448888ded5dbcc157b15", | |
"value": 2 | |
} | |
}, | |
"7ec9b41770e24974948485b096937e77": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_d5d602dcabb94e04896d173e35f5c07a", | |
"placeholder": "", | |
"style": "IPY_MODEL_ea2f90f6fbaa4e28b11f2a435984b57f", | |
"value": " 2/2 [00:03<00:00, 1.60s/it]" | |
} | |
}, | |
"49cbd413823149c9a9c98c8ae9c78be7": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"94d94454d0034eaf875a290ec35f319e": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"f85f0f10adf1470fa5a695163d4e09c6": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"7e0e6619d8e64025b5c66a48fdf3902a": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"2b6a85f76c91448888ded5dbcc157b15": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "ProgressStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"bar_color": null, | |
"description_width": "" | |
} | |
}, | |
"d5d602dcabb94e04896d173e35f5c07a": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"ea2f90f6fbaa4e28b11f2a435984b57f": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
} | |
} | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/chottokun/f2f5c138c8927ac7d2ebd0afba2b75d3/ft_gemma-2b-it_rev2.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"Gemma-2b-itをファインチューニングできるか挑戦" | |
], | |
"metadata": { | |
"id": "fegfmQ-4GXDd" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "M904-LBAFMre" | |
}, | |
"outputs": [], | |
"source": [ | |
"!pip install -q transformers datasets accelerate bitsandbytes peft trl" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import torch\n", | |
"from datasets import load_dataset\n", | |
"from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments\n", | |
"from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model\n", | |
"from trl import SFTTrainer\n" | |
], | |
"metadata": { | |
"id": "PZoJt_S0FQXE" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"model_name = \"google/gemma-2b-it\"\n", | |
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n", | |
"tokenizer.pad_token = tokenizer.eos_token\n", | |
"\n", | |
"bnb_config = BitsAndBytesConfig(\n", | |
" load_in_4bit=True,\n", | |
" bnb_4bit_quant_type=\"nf4\",\n", | |
" bnb_4bit_compute_dtype=torch.float16,\n", | |
")\n", | |
"\n", | |
"model = AutoModelForCausalLM.from_pretrained(\n", | |
" model_name,\n", | |
" quantization_config=bnb_config,\n", | |
" device_map=\"auto\",\n", | |
")\n", | |
"model.config.use_cache = False\n", | |
"model = prepare_model_for_kbit_training(model)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 118, | |
"referenced_widgets": [ | |
"b39cac2a8f3b4d19821ef4248a0014df", | |
"c1a4d0757616437886ca1305daa96ec5", | |
"5131bbb322504465bb752354ff3eec11", | |
"9e7238ea25b443a18dac0d1f4eb967c0", | |
"8db0fc2e5de7496c97ff460facc7a85d", | |
"c01f9911444d4535b27de3e1ba59a27c", | |
"5b5da187608944e98ebcb26f15df848a", | |
"b247249437164bac8de5dad7d995de87", | |
"04d8c3d0cfcc45d58f4c97e32945f589", | |
"e9f4e7b46d304787b10e8f636872dfda", | |
"7865cee22e5b4ed1ab7b02e726f0e2d6" | |
] | |
}, | |
"id": "MnOXO71cFTAv", | |
"outputId": "16da63d4-80b2-41af-e12a-d9e1c4fc6247" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.\n", | |
"Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use\n", | |
"`config.hidden_activation` if you want to override this behaviour.\n", | |
"See https://github.com/huggingface/transformers/pull/29402 for more details.\n" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]" | |
], | |
"application/vnd.jupyter.widget-view+json": { | |
"version_major": 2, | |
"version_minor": 0, | |
"model_id": "b39cac2a8f3b4d19821ef4248a0014df" | |
} | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"lora_config = LoraConfig(\n", | |
" r=8,\n", | |
" lora_alpha=32,\n", | |
" target_modules=[\"q_proj\", \"v_proj\"],\n", | |
" lora_dropout=0.05,\n", | |
" bias=\"none\",\n", | |
" task_type=\"CAUSAL_LM\"\n", | |
")\n", | |
"\n", | |
"model = get_peft_model(model, lora_config)" | |
], | |
"metadata": { | |
"id": "lOTfs0xeFWg6" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# ござるを使わせていただきます。\n", | |
"dataset = load_dataset(\"bbz662bbz/databricks-dolly-15k-ja-gozarinnemon\", split=\"train\")\n", | |
"\n", | |
"def formatting_prompts_func(example):\n", | |
" output = f\"### Instruction: {example['instruction']}\\n\\n### Response: {example['output']}\"\n", | |
" return {'text': output}\n", | |
"\n", | |
"formatted_dataset = dataset.map(formatting_prompts_func)\n" | |
], | |
"metadata": { | |
"id": "fWrz2s_XF0Wv" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# 簡易データセット\n", | |
"# data = [\n", | |
"# {\"instruction\": \"日本の首都は?\", \"response\": \"日本の首都は東京です。\"},\n", | |
"# {\"instruction\": \"一番高い山は?\", \"response\": \"一番高い山はエベレストです。\"},\n", | |
"# {\"instruction\": \"今日の天気は?\", \"response\": \"今日の天気は晴れです。\"},\n", | |
"# ]" | |
], | |
"metadata": { | |
"id": "YMupDWoCfmPF" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import torch\n", | |
"from datasets import load_dataset\n", | |
"from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments\n", | |
"from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model\n", | |
"from trl import SFTTrainer\n", | |
"\n", | |
"# Gemma-2b-itのロード\n", | |
"model_name = \"google/gemma-2b-it\"\n", | |
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n", | |
"tokenizer.pad_token = tokenizer.eos_token\n", | |
"\n", | |
"# 4-bit Quantizationの設定\n", | |
"bnb_config = BitsAndBytesConfig(\n", | |
" load_in_4bit=True,\n", | |
" bnb_4bit_quant_type=\"nf4\",\n", | |
" bnb_4bit_compute_dtype=torch.float16,\n", | |
")\n", | |
"\n", | |
"# モデルのロードと準備\n", | |
"model = AutoModelForCausalLM.from_pretrained(\n", | |
" model_name,\n", | |
" quantization_config=bnb_config,\n", | |
" device_map=\"auto\",\n", | |
" trust_remote_code=True # Gemmaのモデルを読み込むために必要\n", | |
")\n", | |
"model.config.use_cache = False\n", | |
"model = prepare_model_for_kbit_training(model)\n", | |
"\n", | |
"# LoRAの設定\n", | |
"lora_config = LoraConfig(\n", | |
" r=8,\n", | |
" lora_alpha=32,\n", | |
" target_modules=[\"q_proj\", \"v_proj\"],\n", | |
" lora_dropout=0.05,\n", | |
" bias=\"none\",\n", | |
" task_type=\"CAUSAL_LM\"\n", | |
")\n", | |
"\n", | |
"# LoRAの適用\n", | |
"model = get_peft_model(model, lora_config)\n", | |
"\n", | |
"# # データセットのロード\n", | |
"# dataset = load_dataset(\"bbz662bbz/databricks-dolly-15k-ja-gozarinnemon\", split=\"train\")\n", | |
"\n", | |
"# # プロンプトのフォーマット\n", | |
"# def formatting_prompts_func(example):\n", | |
"# output = f\"### Instruction: {example['instruction']}\\n\\n### Response: {example['output']}\"\n", | |
"# return {'text': output}\n", | |
"\n", | |
"# formatted_dataset = dataset.map(formatting_prompts_func)\n", | |
"\n", | |
"# トークナイズ関数\n", | |
"def tokenize_function(examples):\n", | |
" return tokenizer(examples[\"text\"], truncation=True, padding=\"max_length\", max_length=512)\n", | |
"\n", | |
"# データセットのトークナイズ\n", | |
"tokenized_dataset = formatted_dataset.map(tokenize_function, batched=True)\n", | |
"\n", | |
"# SFTTrainerの設定\n", | |
"training_args = TrainingArguments(\n", | |
" output_dir=\"./results\",\n", | |
" num_train_epochs=3,\n", | |
" per_device_train_batch_size=4,\n", | |
" gradient_accumulation_steps=4,\n", | |
" learning_rate=2e-4,\n", | |
" fp16=True,\n", | |
" save_total_limit=3,\n", | |
" logging_steps=100,\n", | |
" save_steps=500,\n", | |
" push_to_hub=False,\n", | |
" logging_dir='./logs', # TensorBoardのログディレクトリ\n", | |
" # resume_from_checkpoint=\"./results\" # チェックポイントのディレクトリを指定\n", | |
")\n", | |
"\n", | |
"# トレーナーの初期化\n", | |
"trainer = SFTTrainer(\n", | |
" model=model,\n", | |
" args=training_args,\n", | |
" train_dataset=tokenized_dataset,\n", | |
" dataset_text_field=\"text\"\n", | |
")\n", | |
"\n", | |
"# ファインチューニングの実行\n", | |
"trainer.train()\n", | |
"\n", | |
"# ファインチューニング済みモデルとトークナイザーの保存\n", | |
"model.save_pretrained(\"./gemma-2b-finetuned\")\n", | |
"tokenizer.save_pretrained(\"./gemma-2b-finetuned\")" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 1000, | |
"referenced_widgets": [ | |
"80bee963c8144bd181777fdb4b74b826", | |
"189f0bc1948e4b90845a40762c8a3dda", | |
"f0661a4ace9c47bda38dc8ea3f864121", | |
"7ec9b41770e24974948485b096937e77", | |
"49cbd413823149c9a9c98c8ae9c78be7", | |
"94d94454d0034eaf875a290ec35f319e", | |
"f85f0f10adf1470fa5a695163d4e09c6", | |
"7e0e6619d8e64025b5c66a48fdf3902a", | |
"2b6a85f76c91448888ded5dbcc157b15", | |
"d5d602dcabb94e04896d173e35f5c07a", | |
"ea2f90f6fbaa4e28b11f2a435984b57f" | |
] | |
}, | |
"id": "AwYkTg_YhBp8", | |
"outputId": "83e6fd21-dcfc-432f-b1a9-4db20b86fa03" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]" | |
], | |
"application/vnd.jupyter.widget-view+json": { | |
"version_major": 2, | |
"version_minor": 0, | |
"model_id": "80bee963c8144bd181777fdb4b74b826" | |
} | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_deprecation.py:100: FutureWarning: Deprecated argument(s) used in '__init__': dataset_text_field. Will not be supported from version '1.0.0'.\n", | |
"\n", | |
"Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.\n", | |
" warnings.warn(message, FutureWarning)\n", | |
"/usr/local/lib/python3.10/dist-packages/trl/trainer/sft_trainer.py:289: UserWarning: You didn't pass a `max_seq_length` argument to the SFTTrainer, this will default to 1024\n", | |
" warnings.warn(\n", | |
"/usr/local/lib/python3.10/dist-packages/trl/trainer/sft_trainer.py:318: UserWarning: You passed a `dataset_text_field` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.\n", | |
" warnings.warn(\n", | |
"/usr/local/lib/python3.10/dist-packages/trl/trainer/sft_trainer.py:408: UserWarning: You passed a tokenizer with `padding_side` not equal to `right` to the SFTTrainer. This might lead to some unexpected behaviour due to overflow issues when training a model in half-precision. You might consider adding `tokenizer.padding_side = 'right'` to your code.\n", | |
" warnings.warn(\n", | |
"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n", | |
" warnings.warn(\n" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
], | |
"text/html": [ | |
"\n", | |
" <div>\n", | |
" \n", | |
" <progress value='2814' max='2814' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", | |
" [2814/2814 7:36:03, Epoch 2/3]\n", | |
" </div>\n", | |
" <table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: left;\">\n", | |
" <th>Step</th>\n", | |
" <th>Training Loss</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>100</td>\n", | |
" <td>0.822800</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>200</td>\n", | |
" <td>0.662900</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>300</td>\n", | |
" <td>0.643900</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>400</td>\n", | |
" <td>0.615200</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>500</td>\n", | |
" <td>0.646100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>600</td>\n", | |
" <td>0.625900</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>700</td>\n", | |
" <td>0.616300</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>800</td>\n", | |
" <td>0.622500</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>900</td>\n", | |
" <td>0.609600</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1000</td>\n", | |
" <td>0.597700</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1100</td>\n", | |
" <td>0.600700</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1200</td>\n", | |
" <td>0.603200</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1300</td>\n", | |
" <td>0.590300</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1400</td>\n", | |
" <td>0.613300</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1500</td>\n", | |
" <td>0.571700</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1600</td>\n", | |
" <td>0.593100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1700</td>\n", | |
" <td>0.578700</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1800</td>\n", | |
" <td>0.592500</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1900</td>\n", | |
" <td>0.576600</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2000</td>\n", | |
" <td>0.567300</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2100</td>\n", | |
" <td>0.587400</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2200</td>\n", | |
" <td>0.543700</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2300</td>\n", | |
" <td>0.567500</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2400</td>\n", | |
" <td>0.601100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2500</td>\n", | |
" <td>0.573400</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2600</td>\n", | |
" <td>0.591400</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2700</td>\n", | |
" <td>0.572400</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2800</td>\n", | |
" <td>0.580100</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table><p>" | |
] | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n", | |
" warnings.warn(\n", | |
"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n", | |
" warnings.warn(\n", | |
"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n", | |
" warnings.warn(\n", | |
"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n", | |
" warnings.warn(\n", | |
"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n", | |
" warnings.warn(\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"('./gemma-2b-finetuned/tokenizer_config.json',\n", | |
" './gemma-2b-finetuned/special_tokens_map.json',\n", | |
" './gemma-2b-finetuned/tokenizer.model',\n", | |
" './gemma-2b-finetuned/added_tokens.json',\n", | |
" './gemma-2b-finetuned/tokenizer.json')" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 7 | |
} | |
] | |
}, | |
{ | |
"source": [ | |
"# ファインチューニングしたモデルをロード\n", | |
"# model = AutoModelForCausalLM.from_pretrained(\"./gemma-2b-finetuned\")\n", | |
"# tokenizer = AutoTokenizer.from_pretrained(\"./gemma-2b-finetuned\")\n", | |
"\n", | |
"# テスト用のプロンプト\n", | |
"prompt = \"富士山と日本の首都には関係性がありますか?\"\n", | |
"\n", | |
"# トークナイズ\n", | |
"inputs = tokenizer(prompt, return_tensors=\"pt\").to(\"cuda\") # Move inputs to GPU\n", | |
"\n", | |
"# モデルに入力して生成\n", | |
"output = model.generate(inputs.input_ids, max_length=50, num_return_sequences=1)\n", | |
"\n", | |
"# 生成されたテキストをデコード\n", | |
"generated_text = tokenizer.decode(output[0], skip_special_tokens=True)\n", | |
"\n", | |
"print(generated_text)" | |
], | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "3QanqqPgQ4Ub", | |
"outputId": "b8dd4bcb-3f43-46f2-915f-7a2850d2c13d" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.\n", | |
"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n", | |
" warnings.warn(\n", | |
"/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:91: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n", | |
" warnings.warn(\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"富士山と日本の首都には関係性がありますか?\n", | |
"\n", | |
"日本は、富士山と東京の間に関係性がありますでござる。\n", | |
"\n", | |
"富士山は、日本の首都東京の南東に位置する山で、東京の都市圏の重要な要素\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from peft import PeftModel\n", | |
"\n", | |
"# マージ\n", | |
"# model = PeftModel.from_pretrained(model, \"./gemma-2b-finetuned\")\n", | |
"model = model.merge_and_unload()\n", | |
"\n", | |
"# マージ後のモデルを保存\n", | |
"model.save_pretrained(\"./gemma-2b-merged\")" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Ra5WZm2hxs7d", | |
"outputId": "0ee1bea2-cc85-40c9-aedd-c1656892e58d" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"/usr/local/lib/python3.10/dist-packages/peft/tuners/lora/bnb.py:336: UserWarning: Merge lora module to 4-bit linear may get different generations due to rounding errors.\n", | |
" warnings.warn(\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!zip -r gemma-2b-merged.zip gemma-2b-merged\n", | |
"!zip -r results.zip results" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "0yWq69bI4WUw", | |
"outputId": "8face763-44b9-4b8f-fd0e-6ebf0a5fd8be" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
" adding: gemma-2b-merged/ (stored 0%)\n", | |
" adding: gemma-2b-merged/config.json (deflated 55%)\n", | |
" adding: gemma-2b-merged/model.safetensors (deflated 38%)\n", | |
" adding: gemma-2b-merged/generation_config.json (deflated 29%)\n", | |
" adding: results/ (stored 0%)\n", | |
" adding: results/checkpoint-2814/ (stored 0%)\n", | |
" adding: results/checkpoint-2814/training_args.bin (deflated 51%)\n", | |
" adding: results/checkpoint-2814/tokenizer_config.json (deflated 95%)\n", | |
" adding: results/checkpoint-2814/tokenizer.model (deflated 51%)\n", | |
" adding: results/checkpoint-2814/special_tokens_map.json (deflated 76%)\n", | |
" adding: results/checkpoint-2814/adapter_model.safetensors (deflated 8%)\n", | |
" adding: results/checkpoint-2814/optimizer.pt (deflated 8%)\n", | |
" adding: results/checkpoint-2814/scheduler.pt (deflated 56%)\n", | |
" adding: results/checkpoint-2814/README.md (deflated 66%)\n", | |
" adding: results/checkpoint-2814/trainer_state.json (deflated 72%)\n", | |
" adding: results/checkpoint-2814/rng_state.pth (deflated 25%)\n", | |
" adding: results/checkpoint-2814/adapter_config.json (deflated 51%)\n", | |
" adding: results/checkpoint-2814/tokenizer.json (deflated 72%)\n", | |
" adding: results/checkpoint-2000/ (stored 0%)\n", | |
" adding: results/checkpoint-2000/training_args.bin (deflated 51%)\n", | |
" adding: results/checkpoint-2000/tokenizer_config.json (deflated 95%)\n", | |
" adding: results/checkpoint-2000/tokenizer.model (deflated 51%)\n", | |
" adding: results/checkpoint-2000/special_tokens_map.json (deflated 76%)\n", | |
" adding: results/checkpoint-2000/adapter_model.safetensors (deflated 8%)\n", | |
" adding: results/checkpoint-2000/optimizer.pt (deflated 8%)\n", | |
" adding: results/checkpoint-2000/scheduler.pt (deflated 55%)\n", | |
" adding: results/checkpoint-2000/README.md (deflated 66%)\n", | |
" adding: results/checkpoint-2000/trainer_state.json (deflated 71%)\n", | |
" adding: results/checkpoint-2000/rng_state.pth (deflated 25%)\n", | |
" adding: results/checkpoint-2000/adapter_config.json (deflated 51%)\n", | |
" adding: results/checkpoint-2000/tokenizer.json (deflated 72%)\n", | |
" adding: results/checkpoint-2500/ (stored 0%)\n", | |
" adding: results/checkpoint-2500/training_args.bin (deflated 51%)\n", | |
" adding: results/checkpoint-2500/tokenizer_config.json (deflated 95%)\n", | |
" adding: results/checkpoint-2500/tokenizer.model (deflated 51%)\n", | |
" adding: results/checkpoint-2500/special_tokens_map.json (deflated 76%)\n", | |
" adding: results/checkpoint-2500/adapter_model.safetensors (deflated 8%)\n", | |
" adding: results/checkpoint-2500/optimizer.pt (deflated 8%)\n", | |
" adding: results/checkpoint-2500/scheduler.pt (deflated 55%)\n", | |
" adding: results/checkpoint-2500/README.md (deflated 66%)\n", | |
" adding: results/checkpoint-2500/trainer_state.json (deflated 72%)\n", | |
" adding: results/checkpoint-2500/rng_state.pth (deflated 25%)\n", | |
" adding: results/checkpoint-2500/adapter_config.json (deflated 51%)\n", | |
" adding: results/checkpoint-2500/tokenizer.json (deflated 72%)\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [], | |
"metadata": { | |
"id": "eCx5JdtO4d9Y" | |
} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment