Skip to content

Instantly share code, notes, and snippets.

@sandeshrajbhandari
Created July 18, 2023 13:40
Show Gist options
  • Save sandeshrajbhandari/342e4f800fa34c7553987376363bd6bf to your computer and use it in GitHub Desktop.
Save sandeshrajbhandari/342e4f800fa34c7553987376363bd6bf to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"gpuType": "T4"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"6670200b8d9e431484f5486451962ccc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_d35938bfcddb4f7690d3595c0116018e",
"IPY_MODEL_1df5d1c44fca46a4a86007703e960d5b",
"IPY_MODEL_1d2b2145ec7543078612ed17f2327e4b"
],
"layout": "IPY_MODEL_eb3a17b492e6494eb7892e718a539a48"
}
},
"d35938bfcddb4f7690d3595c0116018e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_7252698397c44dbca7a8b640fa953e4d",
"placeholder": "​",
"style": "IPY_MODEL_0e799cdd8ceb4768838856e2b1cdb873",
"value": "100%"
}
},
"1df5d1c44fca46a4a86007703e960d5b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_cf411322ca4745c397b6cef20fb3daf3",
"max": 1,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_24d0dfaddbff43fda260890b6e098b02",
"value": 1
}
},
"1d2b2145ec7543078612ed17f2327e4b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_ee7187895fca492fac63ddf62744260a",
"placeholder": "​",
"style": "IPY_MODEL_d2488c580d1b435f8bb7e32c6e7f8421",
"value": " 1/1 [00:00<00:00, 20.02it/s]"
}
},
"eb3a17b492e6494eb7892e718a539a48": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"7252698397c44dbca7a8b640fa953e4d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"0e799cdd8ceb4768838856e2b1cdb873": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"cf411322ca4745c397b6cef20fb3daf3": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"24d0dfaddbff43fda260890b6e098b02": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"ee7187895fca492fac63ddf62744260a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d2488c580d1b435f8bb7e32c6e7f8421": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"ef7d4b96c3824dc48201a25992e73d6f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "VBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "VBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "VBoxView",
"box_style": "",
"children": [
"IPY_MODEL_f4ccb465260d4cfda8407a4b95ef08f8",
"IPY_MODEL_1e9bd33176a84029b25427187d97ad2e",
"IPY_MODEL_86b5bc4a763a46878c7bcbd3a7619194",
"IPY_MODEL_05776f363c5f48868c26ac747cc751ad"
],
"layout": "IPY_MODEL_b63fc334e33b47e9bcea71391a417d5f"
}
},
"31e48c106de148c381865fd2b186d5a8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b8d66adf36fb461ca81adadba204b701",
"placeholder": "​",
"style": "IPY_MODEL_106fe6ea19cc4f458b567a56a838aba6",
"value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
}
},
"0a13fef39f1e4da0a890d6c6a70c6e66": {
"model_module": "@jupyter-widgets/controls",
"model_name": "PasswordModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "PasswordModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "PasswordView",
"continuous_update": true,
"description": "Token:",
"description_tooltip": null,
"disabled": false,
"layout": "IPY_MODEL_07cfc6f9367a4e659a04c3898d3cfe3d",
"placeholder": "​",
"style": "IPY_MODEL_c6c2a1e6c21048248156c0797a0e207d",
"value": ""
}
},
"55bb8db8a85142f28527316d8ab3560d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "CheckboxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "CheckboxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "CheckboxView",
"description": "Add token as git credential?",
"description_tooltip": null,
"disabled": false,
"indent": true,
"layout": "IPY_MODEL_7f59c58d2efc44efb10e79d768d117a8",
"style": "IPY_MODEL_1bd3a7360caf4a1587a7357117dbb4e8",
"value": true
}
},
"82bf72b8a2a044d189f99611e4aca0cb": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Login",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_9e202b617aac4488a312ba3954aceb3e",
"style": "IPY_MODEL_18118d78e7f44c28ae16a67b9157a5cf",
"tooltip": ""
}
},
"e3c591122f0d49d3a5d96f71c9522ad3": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_8d488464581e43e89bd1d55a9c0faa2d",
"placeholder": "​",
"style": "IPY_MODEL_0bd9c2f46c3b49c188dafa88011ce82e",
"value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
}
},
"b63fc334e33b47e9bcea71391a417d5f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": "center",
"align_self": null,
"border": null,
"bottom": null,
"display": "flex",
"flex": null,
"flex_flow": "column",
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": "50%"
}
},
"b8d66adf36fb461ca81adadba204b701": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"106fe6ea19cc4f458b567a56a838aba6": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"07cfc6f9367a4e659a04c3898d3cfe3d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c6c2a1e6c21048248156c0797a0e207d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"7f59c58d2efc44efb10e79d768d117a8": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"1bd3a7360caf4a1587a7357117dbb4e8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"9e202b617aac4488a312ba3954aceb3e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"18118d78e7f44c28ae16a67b9157a5cf": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"button_color": null,
"font_weight": ""
}
},
"8d488464581e43e89bd1d55a9c0faa2d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"0bd9c2f46c3b49c188dafa88011ce82e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"500e6285eed3412ab158e6450b092e57": {
"model_module": "@jupyter-widgets/controls",
"model_name": "LabelModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "LabelModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "LabelView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_dbe68530bacb4bb9a096fbd4fd4206b0",
"placeholder": "​",
"style": "IPY_MODEL_357347403bcd4864b91cdcb8e0b50e8a",
"value": "Connecting..."
}
},
"dbe68530bacb4bb9a096fbd4fd4206b0": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"357347403bcd4864b91cdcb8e0b50e8a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"f4ccb465260d4cfda8407a4b95ef08f8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "LabelModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "LabelModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "LabelView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_72cb74a140d041318dd9c6d7c0392fbd",
"placeholder": "​",
"style": "IPY_MODEL_698814859e544b6da8c0bd2b447d7f6e",
"value": "Token is valid (permission: write)."
}
},
"1e9bd33176a84029b25427187d97ad2e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "LabelModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "LabelModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "LabelView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_8b2419167ca2417dacc03172e619d534",
"placeholder": "​",
"style": "IPY_MODEL_495a030ad8b8478da0406e8627dd3f29",
"value": "Your token has been saved in your configured git credential helpers (store)."
}
},
"86b5bc4a763a46878c7bcbd3a7619194": {
"model_module": "@jupyter-widgets/controls",
"model_name": "LabelModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "LabelModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "LabelView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_bbab4eff8fad443f86c3bfc438242fe4",
"placeholder": "​",
"style": "IPY_MODEL_2dd521042b6c4cff96b08aa4d56e6c02",
"value": "Your token has been saved to /root/.cache/huggingface/token"
}
},
"05776f363c5f48868c26ac747cc751ad": {
"model_module": "@jupyter-widgets/controls",
"model_name": "LabelModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "LabelModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "LabelView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_8f159e22b6094165ac5adefc4db835e1",
"placeholder": "​",
"style": "IPY_MODEL_8a5e551ea12a4a96bd84c6da5c458c31",
"value": "Login successful"
}
},
"72cb74a140d041318dd9c6d7c0392fbd": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"698814859e544b6da8c0bd2b447d7f6e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"8b2419167ca2417dacc03172e619d534": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"495a030ad8b8478da0406e8627dd3f29": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"bbab4eff8fad443f86c3bfc438242fe4": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"2dd521042b6c4cff96b08aa4d56e6c02": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"8f159e22b6094165ac5adefc4db835e1": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"8a5e551ea12a4a96bd84c6da5c458c31": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"bef02a983ef74b8daa9e27c1d38c4b1d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_8e063db56b2245eaaa9ade443b22e2c0",
"IPY_MODEL_869c3e33ae3c4a85969608744ecdd72e",
"IPY_MODEL_1ea68c1943724f4394d66e78bb4c0eb4"
],
"layout": "IPY_MODEL_345a09601db24c4983724472cddcf4af"
}
},
"8e063db56b2245eaaa9ade443b22e2c0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_3c250f255efb488bbf8a596b55911034",
"placeholder": "​",
"style": "IPY_MODEL_aebb8687eeed479e899a4d10624f09d6",
"value": "pytorch_model.bin: 100%"
}
},
"869c3e33ae3c4a85969608744ecdd72e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_8c3a7593be364dfa813ca99e474c5965",
"max": 1621221821,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_6d98941b8a9344bb82a1e786c9418bce",
"value": 1621221821
}
},
"1ea68c1943724f4394d66e78bb4c0eb4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_48d6d1c14ef944a68c2462bd16bd2d63",
"placeholder": "​",
"style": "IPY_MODEL_3a0d5f4a484941248dcf56009413df89",
"value": " 1.62G/1.62G [02:27&lt;00:00, 9.40MB/s]"
}
},
"345a09601db24c4983724472cddcf4af": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"3c250f255efb488bbf8a596b55911034": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"aebb8687eeed479e899a4d10624f09d6": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"8c3a7593be364dfa813ca99e474c5965": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6d98941b8a9344bb82a1e786c9418bce": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"48d6d1c14ef944a68c2462bd16bd2d63": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"3a0d5f4a484941248dcf56009413df89": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"https://github.com/dredwardhyde/gpt-neo-fine-tuning-example/blob/main/gpt_neo.py"
],
"metadata": {
"id": "CaGAqr0vhNmp"
}
},
{
"cell_type": "code",
"source": [
"!pip install -q bitsandbytes datasets accelerate loralib\n",
"!pip install -q git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft.git"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "i5s-BDY9hHvp",
"outputId": "ecf8b901-5b24-4bc9-d9f9-b61017f45673"
},
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.5/92.5 MB\u001b[0m \u001b[31m11.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m486.2/486.2 kB\u001b[0m \u001b[31m30.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m244.2/244.2 kB\u001b[0m \u001b[31m27.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.5/110.5 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.5/212.5 kB\u001b[0m \u001b[31m19.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.3/134.3 kB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m23.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
" Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m64.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m54.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
" Building wheel for peft (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "ff-vIrx5hGEp"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import torch\n",
"from torch.utils.data import Dataset, random_split\n",
"from transformers import AutoTokenizer, TrainingArguments, Trainer, AutoModelForCausalLM, IntervalStrategy"
]
},
{
"cell_type": "code",
"source": [
"!wget https://github.com/dredwardhyde/gpt-neo-fine-tuning-example/raw/main/netflix_titles.csv"
],
"metadata": {
"id": "DM57O6hainx_",
"outputId": "cd702a19-f0be-468b-9b10-aa94891e0621",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"--2023-07-18 11:53:53-- https://github.com/dredwardhyde/gpt-neo-fine-tuning-example/raw/main/netflix_titles.csv\n",
"Resolving github.com (github.com)... 20.205.243.166\n",
"Connecting to github.com (github.com)|20.205.243.166|:443... connected.\n",
"HTTP request sent, awaiting response... 302 Found\n",
"Location: https://raw.githubusercontent.com/dredwardhyde/gpt-neo-fine-tuning-example/main/netflix_titles.csv [following]\n",
"--2023-07-18 11:53:54-- https://raw.githubusercontent.com/dredwardhyde/gpt-neo-fine-tuning-example/main/netflix_titles.csv\n",
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 2992704 (2.9M) [text/plain]\n",
"Saving to: ‘netflix_titles.csv’\n",
"\n",
"netflix_titles.csv 100%[===================>] 2.85M --.-KB/s in 0.01s \n",
"\n",
"2023-07-18 11:53:55 (196 MB/s) - ‘netflix_titles.csv’ saved [2992704/2992704]\n",
"\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"! wget https://github.com/tloen/alpaca-lora/raw/main/alpaca_data.json"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "7fwi8UjP329L",
"outputId": "28f67c7c-a913-484b-d167-fcfb0364fc9a"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"--2023-07-18 11:53:55-- https://github.com/tloen/alpaca-lora/raw/main/alpaca_data.json\n",
"Resolving github.com (github.com)... 20.205.243.166\n",
"Connecting to github.com (github.com)|20.205.243.166|:443... connected.\n",
"HTTP request sent, awaiting response... 302 Found\n",
"Location: https://raw.githubusercontent.com/tloen/alpaca-lora/main/alpaca_data.json [following]\n",
"--2023-07-18 11:53:55-- https://raw.githubusercontent.com/tloen/alpaca-lora/main/alpaca_data.json\n",
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 22773992 (22M) [text/plain]\n",
"Saving to: ‘alpaca_data.json’\n",
"\n",
"alpaca_data.json 100%[===================>] 21.72M --.-KB/s in 0.08s \n",
"\n",
"2023-07-18 11:53:57 (256 MB/s) - ‘alpaca_data.json’ saved [22773992/22773992]\n",
"\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"torch.cuda.empty_cache()"
],
"metadata": {
"id": "D7B9yIAE9p97"
},
"execution_count": 12,
"outputs": []
},
{
"cell_type": "code",
"source": [
"torch.manual_seed(42)\n",
"# tokenizer = AutoTokenizer.from_pretrained(\"EleutherAI/gpt-neo-125m\", bos_token='<|startoftext|>',\n",
"# eos_token='<|endoftext|>', pad_token='<|pad|>')\n",
"tokenizer = AutoTokenizer.from_pretrained(\"EleutherAI/pythia-410m-deduped-v0\")\n",
"tokenizer.pad_token = tokenizer.eos_token\n",
"## don't change bos_token, eos_token and pad_token.\n",
"## set pad_token as eos_token as done in many tutorials\n",
"\n",
"model = AutoModelForCausalLM.from_pretrained(\"EleutherAI/pythia-410m-deduped-v0\").cuda()\n",
"\n",
"model.resize_token_embeddings(len(tokenizer))\n",
"\n",
"descriptions = pd.read_csv('/content/netflix_titles.csv')['description']\n",
"\n",
"max_length = max([len(tokenizer.encode(description)) for description in descriptions])\n",
"# alpacadata['train']['prompt']\n",
"\n",
"print(\"Max length: {}\".format(max_length))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "d8XMZt0DiCwn",
"outputId": "f8f5d1b0-1dfa-4a1e-c0e2-c12da1f750d9"
},
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Max length: 64\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from datasets import load_dataset\n",
"data = load_dataset(\"json\", data_files=\"alpaca_data.json\")"
],
"metadata": {
"id": "HaU2MYfJy2fs",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 67,
"referenced_widgets": [
"6670200b8d9e431484f5486451962ccc",
"d35938bfcddb4f7690d3595c0116018e",
"1df5d1c44fca46a4a86007703e960d5b",
"1d2b2145ec7543078612ed17f2327e4b",
"eb3a17b492e6494eb7892e718a539a48",
"7252698397c44dbca7a8b640fa953e4d",
"0e799cdd8ceb4768838856e2b1cdb873",
"cf411322ca4745c397b6cef20fb3daf3",
"24d0dfaddbff43fda260890b6e098b02",
"ee7187895fca492fac63ddf62744260a",
"d2488c580d1b435f8bb7e32c6e7f8421"
]
},
"outputId": "4177615d-e3ee-473f-b27d-50dbac9ff2ab"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"WARNING:datasets.builder:Found cached dataset json (/root/.cache/huggingface/datasets/json/default-9dd85b60ab296bf1/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
" 0%| | 0/1 [00:00<?, ?it/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "6670200b8d9e431484f5486451962ccc"
}
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"def generate_prompt(data_point):\n",
" # sorry about the formatting disaster gotta move fast\n",
" if data_point[\"instruction\"]:\n",
" return f\"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
"\n",
"### Instruction:\n",
"{data_point[\"instruction\"]}\n",
"\n",
"### Input:\n",
"{data_point[\"input\"]}\n",
"\n",
"### Response:\n",
"{data_point[\"output\"]}\"\"\"\n",
" else:\n",
" return f\"\"\"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
"\n",
"### Instruction:\n",
"{data_point[\"instruction\"]}\n",
"\n",
"### Response:\n",
"{data_point[\"output\"]}\"\"\"\n",
"\n",
"\n",
"alpacadata = data.map(lambda data_point: {\"prompt\": (generate_prompt(data_point))})"
],
"metadata": {
"id": "hIzxe0K44rtK",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "a6663fa3-0cc4-4bbd-f8ad-6d4d96209212"
},
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"WARNING:datasets.arrow_dataset:Loading cached processed dataset at /root/.cache/huggingface/datasets/json/default-9dd85b60ab296bf1/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-87f63d089e856476.arrow\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"alpacadata['train']['prompt'][0]\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 70
},
"id": "gLkiRmWJ6p96",
"outputId": "d4043057-d3a6-41e9-c494-29c7977cb7f4"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\\n\\n### Instruction:\\nGive three tips for staying healthy.\\n\\n### Input:\\n\\n\\n### Response:\\n1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \\n2. Exercise regularly to keep your body active and strong. \\n3. Get enough sleep and maintain a consistent sleep schedule.'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 35
}
]
},
{
"cell_type": "code",
"source": [
"data\n",
"train_dataset=data[\"train\"]\n",
"train_dataset"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "LonoOiNI5d07",
"outputId": "aa5ac4c8-c8be-4d6b-e6b4-e568f4fe8707"
},
"execution_count": 8,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Dataset({\n",
" features: ['instruction', 'output', 'input'],\n",
" num_rows: 52002\n",
"})"
]
},
"metadata": {},
"execution_count": 8
}
]
},
{
"cell_type": "code",
"source": [
"len(descriptions[:1000])"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "lDvlPqSkpfYH",
"outputId": "3e899482-142b-4b14-c1e6-f5815f3a4ec5"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"1000"
]
},
"metadata": {},
"execution_count": 57
}
]
},
{
"cell_type": "code",
"source": [
"tokenizer.eos_token, tokenizer.pad_token, tokenizer.bos_token,"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "pEvMpqXEwPCc",
"outputId": "d1393083-3638-4624-dd69-e582cdb7f892"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"('<|endoftext|>', '<|endoftext|>', '<|endoftext|>')"
]
},
"metadata": {},
"execution_count": 42
}
]
},
{
"cell_type": "code",
"source": [
"tokenizer??"
],
"metadata": {
"id": "EVGJdfZQxQuc"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"tokenizer.decode(dataset[0][0], skip_special_tokens=True)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 70
},
"id": "xb9tDMtQv6Tl",
"outputId": "ed9b3cae-4376-4082-c332-abbaddde725b"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\\n\\n### Instruction:\\nGive three tips for staying healthy.\\n\\n### Input:\\n\\n\\n### Response:\\n1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \\n2. Exercise regularly to keep your body active and strong. \\n3. Get enough sleep and maintain a consistent sleep schedule.'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 49
}
]
},
{
"cell_type": "code",
"source": [
"alpacaPrompts = alpacadata['train']['prompt'][:10000] ## only 1000 sets\n",
"# (alpacaPrompts[:1000][0])\n",
"max_length = max([len(tokenizer.encode(alpacaPrompt)) for alpacaPrompt in alpacaPrompts])\n",
"max_length"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "3JOM9V457mRK",
"outputId": "4a15ca17-ec5b-4e5d-a0d9-6d423653d6d4"
},
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"1034"
]
},
"metadata": {},
"execution_count": 6
}
]
},
{
"cell_type": "code",
"source": [
"class NetflixDataset(Dataset):\n",
" def __init__(self, txt_list, tokenizer, max_length):\n",
" self.input_ids = []\n",
" self.attn_masks = []\n",
" self.labels = []\n",
" for txt in txt_list:\n",
" encodings_dict = tokenizer('<|endoftext|>' + txt + '<|endoftext|>', truncation=True,\n",
" max_length=max_length, padding=\"max_length\")\n",
" self.input_ids.append(torch.tensor(encodings_dict['input_ids']))\n",
" self.attn_masks.append(torch.tensor(encodings_dict['attention_mask']))\n",
"\n",
" def __len__(self):\n",
" return len(self.input_ids)\n",
"\n",
" def __getitem__(self, idx):\n",
" return self.input_ids[idx], self.attn_masks[idx]\n",
"\n",
"\n",
"dataset = NetflixDataset(alpacaPrompts, tokenizer, max_length=max_length)\n",
"train_size = int(0.2 * len(dataset))\n",
"train_dataset, val_dataset = random_split(dataset, [train_size, len(dataset)-train_size])\n"
],
"metadata": {
"id": "4gdvR3IfiBK5"
},
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"source": [
"(dataset[0])"
],
"metadata": {
"id": "9iclO7Rrwu-E"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"training_args = TrainingArguments(output_dir='./results', num_train_epochs=5, logging_steps=500,\n",
" save_strategy=IntervalStrategy.NO,\n",
" per_device_train_batch_size=2, per_device_eval_batch_size=4,\n",
" warmup_steps=100, weight_decay=0.01, logging_dir='./logs')\n",
"\n",
"Trainer(model=model, args=training_args, train_dataset=train_dataset,\n",
" eval_dataset=val_dataset, data_collator=lambda data: {'input_ids': torch.stack([f[0] for f in data]),\n",
" 'attention_mask': torch.stack([f[1] for f in data]),\n",
" 'labels': torch.stack([f[0] for f in data])}).train()\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 603
},
"id": "RKujM669iyYH",
"outputId": "98a1768e-65f5-4f08-fdac-e0d0e0b4df96"
},
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/transformers/optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" warnings.warn(\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='2450' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [2450/5000 1:16:05 < 1:19:15, 0.54 it/s, Epoch 2.45/5]\n",
" </div>\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>Step</th>\n",
" <th>Training Loss</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>500</td>\n",
" <td>0.333800</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1000</td>\n",
" <td>0.200800</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1500</td>\n",
" <td>0.122000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2000</td>\n",
" <td>0.117100</td>\n",
" </tr>\n",
" </tbody>\n",
"</table><p>"
]
},
"metadata": {}
},
{
"output_type": "error",
"ename": "KeyboardInterrupt",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-8-bf5bd8333aa5>\u001b[0m in \u001b[0;36m<cell line: 6>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m eval_dataset=val_dataset, data_collator=lambda data: {'input_ids': torch.stack([f[0] for f in data]),\n\u001b[1;32m 8\u001b[0m \u001b[0;34m'attention_mask'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mf\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m 'labels': torch.stack([f[0] for f in data])}).train()\n\u001b[0m",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1524\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_inner_training_loop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_train_batch_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauto_find_batch_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1525\u001b[0m )\n\u001b[0;32m-> 1526\u001b[0;31m return inner_training_loop(\n\u001b[0m\u001b[1;32m 1527\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1528\u001b[0m \u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 1794\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1795\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maccelerator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maccumulate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1796\u001b[0;31m \u001b[0mtr_loss_step\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1797\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1798\u001b[0m if (\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtraining_step\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m 2650\u001b[0m \u001b[0mscaled_loss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2651\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2652\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maccelerator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2653\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2654\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgradient_accumulation_steps\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/accelerate/accelerator.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, loss, **kwargs)\u001b[0m\n\u001b[1;32m 1851\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscale\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1852\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1853\u001b[0;31m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1854\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1855\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0munscale_gradients\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/_tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 485\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 486\u001b[0m )\n\u001b[0;32m--> 487\u001b[0;31m torch.autograd.backward(\n\u001b[0m\u001b[1;32m 488\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 489\u001b[0m )\n",
"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0;31m# some Python versions print out the first line of a multi-line function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;31m# calls in the traceback and some print out the last line\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 200\u001b[0;31m Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n\u001b[0m\u001b[1;32m 201\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 202\u001b[0m allow_unreachable=True, accumulate_grad=True) # Calls into the C++ engine to run the backward pass\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
]
},
{
"cell_type": "code",
"source": [
"model"
],
"metadata": {
"id": "6uC4RZRmM-7x",
"outputId": "9309b17a-c821-499a-dc89-c2b13238b9d8",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"execution_count": 9,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"GPTNeoXForCausalLM(\n",
" (gpt_neox): GPTNeoXModel(\n",
" (embed_in): Embedding(50277, 1024)\n",
" (emb_dropout): Dropout(p=0.0, inplace=False)\n",
" (layers): ModuleList(\n",
" (0-23): 24 x GPTNeoXLayer(\n",
" (input_layernorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n",
" (post_attention_layernorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n",
" (post_attention_dropout): Dropout(p=0.0, inplace=False)\n",
" (post_mlp_dropout): Dropout(p=0.0, inplace=False)\n",
" (attention): GPTNeoXAttention(\n",
" (rotary_emb): GPTNeoXRotaryEmbedding()\n",
" (query_key_value): Linear(in_features=1024, out_features=3072, bias=True)\n",
" (dense): Linear(in_features=1024, out_features=1024, bias=True)\n",
" (attention_dropout): Dropout(p=0.0, inplace=False)\n",
" )\n",
" (mlp): GPTNeoXMLP(\n",
" (dense_h_to_4h): Linear(in_features=1024, out_features=4096, bias=True)\n",
" (dense_4h_to_h): Linear(in_features=4096, out_features=1024, bias=True)\n",
" (act): GELUActivation()\n",
" )\n",
" )\n",
" )\n",
" (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n",
" )\n",
" (embed_out): Linear(in_features=1024, out_features=50277, bias=False)\n",
")"
]
},
"metadata": {},
"execution_count": 9
}
]
},
{
"cell_type": "code",
"source": [
"model.generate??"
],
"metadata": {
"id": "2FIIqdcZpnpg"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"tokenizer??"
],
"metadata": {
"id": "Udqua-r5qJ8H"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"model.generation_config"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "3x52R4acwoPM",
"outputId": "bdb7742a-c517-4346-8d48-a8b22ded3b08"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"GenerationConfig {\n",
" \"_from_model_config\": true,\n",
" \"bos_token_id\": 50256,\n",
" \"eos_token_id\": 50256,\n",
" \"transformers_version\": \"4.32.0.dev0\"\n",
"}"
]
},
"metadata": {},
"execution_count": 31
}
]
},
{
"cell_type": "code",
"source": [
"## sample prompt input\n",
"alpacaInput = \"\"\"\n",
"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\\n\\n### Instruction:\\nMake a list of gift ideas.\\n\\n### Input:\\n\\n\\n### Response:\n",
"\"\"\""
],
"metadata": {
"id": "A9xRoXv98Ola"
},
"execution_count": 10,
"outputs": []
},
{
"cell_type": "code",
"source": [
"model.generate??"
],
"metadata": {
"id": "CAU3AUiX-irq"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# generated = tokenizer(alpacaInput,return_tensors=\"pt\").input_ids.cuda()\n",
"generated = tokenizer(alpacaInput,return_tensors=\"pt\").input_ids.cuda()\n",
"generated_am = tokenizer(alpacaInput,return_tensors=\"pt\").attention_mask.cuda()\n",
"sample_outputs = model.generate(generated, do_sample=True, top_k=50,\n",
" # bos_token='<|startoftext|>',\n",
" # eos_token='<|endoftext|>', pad_token='<|pad|>',\n",
" max_length=1000, top_p=0.95, temperature=1, num_return_sequences=1)\n",
"\n",
"for i, sample_output in enumerate(sample_outputs):\n",
" print(\"{}: {}\".format(i, tokenizer.decode(sample_output, skip_special_tokens=True)))\n"
],
"metadata": {
"id": "5oYxTmoMi4Ei"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from huggingface_hub import notebook_login\n",
"\n",
"notebook_login()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 145,
"referenced_widgets": [
"ef7d4b96c3824dc48201a25992e73d6f",
"31e48c106de148c381865fd2b186d5a8",
"0a13fef39f1e4da0a890d6c6a70c6e66",
"55bb8db8a85142f28527316d8ab3560d",
"82bf72b8a2a044d189f99611e4aca0cb",
"e3c591122f0d49d3a5d96f71c9522ad3",
"b63fc334e33b47e9bcea71391a417d5f",
"b8d66adf36fb461ca81adadba204b701",
"106fe6ea19cc4f458b567a56a838aba6",
"07cfc6f9367a4e659a04c3898d3cfe3d",
"c6c2a1e6c21048248156c0797a0e207d",
"7f59c58d2efc44efb10e79d768d117a8",
"1bd3a7360caf4a1587a7357117dbb4e8",
"9e202b617aac4488a312ba3954aceb3e",
"18118d78e7f44c28ae16a67b9157a5cf",
"8d488464581e43e89bd1d55a9c0faa2d",
"0bd9c2f46c3b49c188dafa88011ce82e",
"500e6285eed3412ab158e6450b092e57",
"dbe68530bacb4bb9a096fbd4fd4206b0",
"357347403bcd4864b91cdcb8e0b50e8a",
"f4ccb465260d4cfda8407a4b95ef08f8",
"1e9bd33176a84029b25427187d97ad2e",
"86b5bc4a763a46878c7bcbd3a7619194",
"05776f363c5f48868c26ac747cc751ad",
"72cb74a140d041318dd9c6d7c0392fbd",
"698814859e544b6da8c0bd2b447d7f6e",
"8b2419167ca2417dacc03172e619d534",
"495a030ad8b8478da0406e8627dd3f29",
"bbab4eff8fad443f86c3bfc438242fe4",
"2dd521042b6c4cff96b08aa4d56e6c02",
"8f159e22b6094165ac5adefc4db835e1",
"8a5e551ea12a4a96bd84c6da5c458c31"
]
},
"id": "ycYHuUzoyY-D",
"outputId": "26e7260d-b6f4-4408-dcab-2f4326fb6abc"
},
"execution_count": 16,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "ef7d4b96c3824dc48201a25992e73d6f"
}
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"model.push_to_hub(\"sandeshrajx/pythia-410m-alpaca-deduped-v0\", use_auth_token=True)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 86,
"referenced_widgets": [
"bef02a983ef74b8daa9e27c1d38c4b1d",
"8e063db56b2245eaaa9ade443b22e2c0",
"869c3e33ae3c4a85969608744ecdd72e",
"1ea68c1943724f4394d66e78bb4c0eb4",
"345a09601db24c4983724472cddcf4af",
"3c250f255efb488bbf8a596b55911034",
"aebb8687eeed479e899a4d10624f09d6",
"8c3a7593be364dfa813ca99e474c5965",
"6d98941b8a9344bb82a1e786c9418bce",
"48d6d1c14ef944a68c2462bd16bd2d63",
"3a0d5f4a484941248dcf56009413df89"
]
},
"id": "DJ3D5TVSyh3D",
"outputId": "df6bcea8-1b7d-4eac-c171-a22310429607"
},
"execution_count": 17,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"pytorch_model.bin: 0%| | 0.00/1.62G [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "bef02a983ef74b8daa9e27c1d38c4b1d"
}
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"CommitInfo(commit_url='https://huggingface.co/sandeshrajx/pythia-410m-alpaca-deduped-v0/commit/070e8bcee1020ec6e835e9ca7747ce9303b39f9d', commit_message='Upload GPTNeoXForCausalLM', commit_description='', oid='070e8bcee1020ec6e835e9ca7747ce9303b39f9d', pr_url=None, pr_revision=None, pr_num=None)"
]
},
"metadata": {},
"execution_count": 17
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment