Created
July 17, 2021 10:42
-
-
Save ritog/757c32d5b5065a91e63beb017e38dba9 to your computer and use it in GitHub Desktop.
Copy of robi-poems.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Copy of robi-poems.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"accelerator": "GPU", | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"15cfa01ee073450886a880886604c562": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_9caabaee8a0a499f9db335174e2295d5", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_62462f8646464a38aa5fceb53af0c72b", | |
"IPY_MODEL_724b8ea1b43647ba860325090ed99250" | |
] | |
} | |
}, | |
"9caabaee8a0a499f9db335174e2295d5": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"62462f8646464a38aa5fceb53af0c72b": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_608a96da3b834b2dbee688614c041fbd", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 881, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 881, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_15031a8ce2c14dd8a1af5112abc4bc99" | |
} | |
}, | |
"724b8ea1b43647ba860325090ed99250": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_4557a319d0024a8f8d3d5e10137e8120", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 881/881 [00:08<00:00, 107B/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_25fe9839fa41409fa1b8be4f9caa1dc4" | |
} | |
}, | |
"608a96da3b834b2dbee688614c041fbd": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"15031a8ce2c14dd8a1af5112abc4bc99": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"4557a319d0024a8f8d3d5e10137e8120": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"25fe9839fa41409fa1b8be4f9caa1dc4": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"2c2664b316e843d98e97ed6ddd78ad1d": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_87b54d2be7e44a14b71880aa7f0977fa", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_0e26bf1b7ff64064858b55f64c13e5c9", | |
"IPY_MODEL_10067a00aa7f46e895a11ea42c1a0c83" | |
] | |
} | |
}, | |
"87b54d2be7e44a14b71880aa7f0977fa": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"0e26bf1b7ff64064858b55f64c13e5c9": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_5e51cc0230f9477ea229a9c8cf0a72c5", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 1708956, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 1708956, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_b7b5ac4bfa484e05bfb65ecd0fdcabec" | |
} | |
}, | |
"10067a00aa7f46e895a11ea42c1a0c83": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_90fe734421514632b7455707fdb6647a", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 1.71M/1.71M [00:06<00:00, 253kB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_fef095a5d6e047dcb79bff65a0b80124" | |
} | |
}, | |
"5e51cc0230f9477ea229a9c8cf0a72c5": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"b7b5ac4bfa484e05bfb65ecd0fdcabec": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"90fe734421514632b7455707fdb6647a": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"fef095a5d6e047dcb79bff65a0b80124": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"580bb65ee3824889b5631568b99a58c8": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_aac5891278e84c5291e5b0164044116e", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_66d20b10edfe4958a39368199fcaad3b", | |
"IPY_MODEL_306b0cfe44594bca8befbf05d6deff51" | |
] | |
} | |
}, | |
"aac5891278e84c5291e5b0164044116e": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"66d20b10edfe4958a39368199fcaad3b": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_072fc33d415b4421a069677ad61ff011", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 510401385, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 510401385, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_9e3c7fe1fff44b41af0798b7fe192f0f" | |
} | |
}, | |
"306b0cfe44594bca8befbf05d6deff51": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_94f04430f25f49beb366ab3ca728ec3a", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 510M/510M [00:13<00:00, 36.7MB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_6f4cca4a80274eadaf20d28acb074d6a" | |
} | |
}, | |
"072fc33d415b4421a069677ad61ff011": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"9e3c7fe1fff44b41af0798b7fe192f0f": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"94f04430f25f49beb366ab3ca728ec3a": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"6f4cca4a80274eadaf20d28acb074d6a": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
} | |
} | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/ghosh-r/757c32d5b5065a91e63beb017e38dba9/copy-of-robi-poems.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "dV9hhxEKF48s" | |
}, | |
"source": [ | |
"%%capture\n", | |
"! pip install datasets transformers" | |
], | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "MZ13avB4Ebnw" | |
}, | |
"source": [ | |
"# import re\n", | |
"import json\n", | |
"from sklearn.model_selection import train_test_split\n", | |
"import pandas as pd" | |
], | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 132, | |
"referenced_widgets": [ | |
"15cfa01ee073450886a880886604c562", | |
"9caabaee8a0a499f9db335174e2295d5", | |
"62462f8646464a38aa5fceb53af0c72b", | |
"724b8ea1b43647ba860325090ed99250", | |
"608a96da3b834b2dbee688614c041fbd", | |
"15031a8ce2c14dd8a1af5112abc4bc99", | |
"4557a319d0024a8f8d3d5e10137e8120", | |
"25fe9839fa41409fa1b8be4f9caa1dc4", | |
"2c2664b316e843d98e97ed6ddd78ad1d", | |
"87b54d2be7e44a14b71880aa7f0977fa", | |
"0e26bf1b7ff64064858b55f64c13e5c9", | |
"10067a00aa7f46e895a11ea42c1a0c83", | |
"5e51cc0230f9477ea229a9c8cf0a72c5", | |
"b7b5ac4bfa484e05bfb65ecd0fdcabec", | |
"90fe734421514632b7455707fdb6647a", | |
"fef095a5d6e047dcb79bff65a0b80124" | |
] | |
}, | |
"id": "yGHBLbrGIWyV", | |
"outputId": "eb5296d4-3223-4dbc-de2c-be5bb8c70128" | |
}, | |
"source": [ | |
"from transformers import AutoTokenizer\n", | |
"\n", | |
"tokenizer = AutoTokenizer.from_pretrained(\"ghosh-r/bangla-gpt2\")\n", | |
"\n", | |
"train_path = 'train.txt'\n", | |
"test_path = 'test.txt'" | |
], | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "15cfa01ee073450886a880886604c562", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=881.0, style=ProgressStyle(description_…" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "2c2664b316e843d98e97ed6ddd78ad1d", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1708956.0, style=ProgressStyle(descript…" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "1b4f-k8EIdXk", | |
"outputId": "f18dc59f-21cf-4268-8622-65f56430b036" | |
}, | |
"source": [ | |
"from transformers import TextDataset,DataCollatorForLanguageModeling\n", | |
"\n", | |
"def load_dataset(train_path,test_path,tokenizer):\n", | |
" train_dataset = TextDataset(\n", | |
" tokenizer=tokenizer,\n", | |
" file_path=train_path,\n", | |
" block_size=128)\n", | |
" \n", | |
" test_dataset = TextDataset(\n", | |
" tokenizer=tokenizer,\n", | |
" file_path=test_path,\n", | |
" block_size=128) \n", | |
" \n", | |
" data_collator = DataCollatorForLanguageModeling(\n", | |
" tokenizer=tokenizer, mlm=False,\n", | |
" )\n", | |
" return train_dataset,test_dataset,data_collator\n", | |
"\n", | |
"train_dataset,test_dataset,data_collator = load_dataset(train_path,test_path,tokenizer)" | |
], | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.7/dist-packages/transformers/data/datasets/language_modeling.py:58: FutureWarning: This dataset will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/master/examples/pytorch/language-modeling/run_mlm.py\n", | |
" FutureWarning,\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 120, | |
"referenced_widgets": [ | |
"580bb65ee3824889b5631568b99a58c8", | |
"aac5891278e84c5291e5b0164044116e", | |
"66d20b10edfe4958a39368199fcaad3b", | |
"306b0cfe44594bca8befbf05d6deff51", | |
"072fc33d415b4421a069677ad61ff011", | |
"9e3c7fe1fff44b41af0798b7fe192f0f", | |
"94f04430f25f49beb366ab3ca728ec3a", | |
"6f4cca4a80274eadaf20d28acb074d6a" | |
] | |
}, | |
"id": "FuETPslLIo4U", | |
"outputId": "51042dd7-674a-4cbc-868c-e33dda336ea7" | |
}, | |
"source": [ | |
"from transformers import Trainer, TrainingArguments,AutoModelWithLMHead\n", | |
"\n", | |
"model = AutoModelWithLMHead.from_pretrained(\"ghosh-r/bangla-gpt2\")\n", | |
"\n", | |
"\n", | |
"training_args = TrainingArguments(\n", | |
" output_dir=\"./gpt2-robi_kobita\", #The output directory\n", | |
" overwrite_output_dir=True, #overwrite the content of the output directory\n", | |
" num_train_epochs=3, # number of training epochs\n", | |
" per_device_train_batch_size=32, # batch size for training\n", | |
" per_device_eval_batch_size=64, # batch size for evaluation\n", | |
" eval_steps = 400, # Number of update steps between two evaluations.\n", | |
" save_steps=800, # after # steps model is saved \n", | |
" warmup_steps=500,# number of warmup steps for learning rate scheduler\n", | |
" prediction_loss_only=True,\n", | |
" )\n", | |
"\n", | |
"\n", | |
"trainer = Trainer(\n", | |
" model=model,\n", | |
" args=training_args,\n", | |
" data_collator=data_collator,\n", | |
" train_dataset=train_dataset,\n", | |
" eval_dataset=test_dataset,\n", | |
")" | |
], | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.7/dist-packages/transformers/models/auto/modeling_auto.py:847: FutureWarning: The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.\n", | |
" FutureWarning,\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "580bb65ee3824889b5631568b99a58c8", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=510401385.0, style=ProgressStyle(descri…" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 347 | |
}, | |
"id": "j-t8_0pjZQiu", | |
"outputId": "9283797a-2d02-4b69-bc37-2e665deeaf12" | |
}, | |
"source": [ | |
"trainer.train()" | |
], | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"***** Running training *****\n", | |
" Num examples = 6888\n", | |
" Num Epochs = 3\n", | |
" Instantaneous batch size per device = 32\n", | |
" Total train batch size (w. parallel, distributed & accumulation) = 32\n", | |
" Gradient Accumulation steps = 1\n", | |
" Total optimization steps = 648\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"\n", | |
" <div>\n", | |
" \n", | |
" <progress value='648' max='648' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", | |
" [648/648 06:08, Epoch 3/3]\n", | |
" </div>\n", | |
" <table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: left;\">\n", | |
" <th>Step</th>\n", | |
" <th>Training Loss</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>500</td>\n", | |
" <td>2.350000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table><p>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"\n", | |
"Training completed. Do not forget to share your model on huggingface.co/models =)\n", | |
"\n", | |
"\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"TrainOutput(global_step=648, training_loss=2.242282631956501, metrics={'train_runtime': 369.4134, 'train_samples_per_second': 55.937, 'train_steps_per_second': 1.754, 'total_flos': 1974853779849216.0, 'train_loss': 2.242282631956501, 'epoch': 3.0})" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 10 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "jBRFaebLd9NA", | |
"outputId": "adf8f2f6-1fb5-4966-cb5d-413f363d1884" | |
}, | |
"source": [ | |
"trainer.save_model()" | |
], | |
"execution_count": 11, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Saving model checkpoint to ./gpt2-robi_kobita\n", | |
"Configuration saved in ./gpt2-robi_kobita/config.json\n", | |
"Model weights saved in ./gpt2-robi_kobita/pytorch_model.bin\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "PBW6W2D-d0HJ", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "a817202f-34e6-4e4d-ec17-bde31dedae4d" | |
}, | |
"source": [ | |
"from transformers import pipeline\n", | |
"\n", | |
"poet = pipeline('text-generation',model='./gpt2-robi_kobita', tokenizer='ghosh-r/bangla-gpt2')" | |
], | |
"execution_count": 15, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"loading configuration file ./gpt2-robi_kobita/config.json\n", | |
"Model config GPT2Config {\n", | |
" \"_name_or_path\": \"ghosh-r/bangla-gpt2\",\n", | |
" \"activation_function\": \"gelu_new\",\n", | |
" \"architectures\": [\n", | |
" \"GPT2LMHeadModel\"\n", | |
" ],\n", | |
" \"attn_pdrop\": 0.0,\n", | |
" \"bos_token_id\": 50256,\n", | |
" \"embd_pdrop\": 0.0,\n", | |
" \"eos_token_id\": 50256,\n", | |
" \"gradient_checkpointing\": false,\n", | |
" \"initializer_range\": 0.02,\n", | |
" \"layer_norm_epsilon\": 1e-05,\n", | |
" \"model_type\": \"gpt2\",\n", | |
" \"n_ctx\": 1024,\n", | |
" \"n_embd\": 768,\n", | |
" \"n_head\": 12,\n", | |
" \"n_inner\": null,\n", | |
" \"n_layer\": 12,\n", | |
" \"n_positions\": 1024,\n", | |
" \"resid_pdrop\": 0.0,\n", | |
" \"scale_attn_weights\": true,\n", | |
" \"summary_activation\": null,\n", | |
" \"summary_first_dropout\": 0.1,\n", | |
" \"summary_proj_to_labels\": true,\n", | |
" \"summary_type\": \"cls_index\",\n", | |
" \"summary_use_proj\": true,\n", | |
" \"task_specific_params\": {\n", | |
" \"text-generation\": {\n", | |
" \"do_sample\": true,\n", | |
" \"max_length\": 50\n", | |
" }\n", | |
" },\n", | |
" \"torch_dtype\": \"float32\",\n", | |
" \"transformers_version\": \"4.8.2\",\n", | |
" \"use_cache\": true,\n", | |
" \"vocab_size\": 50257\n", | |
"}\n", | |
"\n", | |
"loading configuration file ./gpt2-robi_kobita/config.json\n", | |
"Model config GPT2Config {\n", | |
" \"_name_or_path\": \"ghosh-r/bangla-gpt2\",\n", | |
" \"activation_function\": \"gelu_new\",\n", | |
" \"architectures\": [\n", | |
" \"GPT2LMHeadModel\"\n", | |
" ],\n", | |
" \"attn_pdrop\": 0.0,\n", | |
" \"bos_token_id\": 50256,\n", | |
" \"embd_pdrop\": 0.0,\n", | |
" \"eos_token_id\": 50256,\n", | |
" \"gradient_checkpointing\": false,\n", | |
" \"initializer_range\": 0.02,\n", | |
" \"layer_norm_epsilon\": 1e-05,\n", | |
" \"model_type\": \"gpt2\",\n", | |
" \"n_ctx\": 1024,\n", | |
" \"n_embd\": 768,\n", | |
" \"n_head\": 12,\n", | |
" \"n_inner\": null,\n", | |
" \"n_layer\": 12,\n", | |
" \"n_positions\": 1024,\n", | |
" \"resid_pdrop\": 0.0,\n", | |
" \"scale_attn_weights\": true,\n", | |
" \"summary_activation\": null,\n", | |
" \"summary_first_dropout\": 0.1,\n", | |
" \"summary_proj_to_labels\": true,\n", | |
" \"summary_type\": \"cls_index\",\n", | |
" \"summary_use_proj\": true,\n", | |
" \"task_specific_params\": {\n", | |
" \"text-generation\": {\n", | |
" \"do_sample\": true,\n", | |
" \"max_length\": 50\n", | |
" }\n", | |
" },\n", | |
" \"torch_dtype\": \"float32\",\n", | |
" \"transformers_version\": \"4.8.2\",\n", | |
" \"use_cache\": true,\n", | |
" \"vocab_size\": 50257\n", | |
"}\n", | |
"\n", | |
"loading weights file ./gpt2-robi_kobita/pytorch_model.bin\n", | |
"All model checkpoint weights were used when initializing GPT2LMHeadModel.\n", | |
"\n", | |
"All the weights of GPT2LMHeadModel were initialized from the model checkpoint at ./gpt2-robi_kobita.\n", | |
"If your task is similar to the task the model of the checkpoint was trained on, you can already use GPT2LMHeadModel for predictions without further training.\n", | |
"Could not locate the tokenizer configuration file, will try to use the model config instead.\n", | |
"loading configuration file https://huggingface.co/ghosh-r/bangla-gpt2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/d916cda1fc7c33b497e404011afd7185469a7d6daf8d674e4bcbdb951e059768.0d65f2cde7eca11af939c04f8bd685073e094591ded3eb632f51dc0e29899bde\n", | |
"Model config GPT2Config {\n", | |
" \"_name_or_path\": \"ghosh-r/bangla-gpt2\",\n", | |
" \"activation_function\": \"gelu_new\",\n", | |
" \"architectures\": [\n", | |
" \"GPT2LMHeadModel\"\n", | |
" ],\n", | |
" \"attn_pdrop\": 0.0,\n", | |
" \"bos_token_id\": 50256,\n", | |
" \"embd_pdrop\": 0.0,\n", | |
" \"eos_token_id\": 50256,\n", | |
" \"gradient_checkpointing\": false,\n", | |
" \"initializer_range\": 0.02,\n", | |
" \"layer_norm_epsilon\": 1e-05,\n", | |
" \"model_type\": \"gpt2\",\n", | |
" \"n_ctx\": 1024,\n", | |
" \"n_embd\": 768,\n", | |
" \"n_head\": 12,\n", | |
" \"n_inner\": null,\n", | |
" \"n_layer\": 12,\n", | |
" \"n_positions\": 1024,\n", | |
" \"resid_pdrop\": 0.0,\n", | |
" \"scale_attn_weights\": true,\n", | |
" \"summary_activation\": null,\n", | |
" \"summary_first_dropout\": 0.1,\n", | |
" \"summary_proj_to_labels\": true,\n", | |
" \"summary_type\": \"cls_index\",\n", | |
" \"summary_use_proj\": true,\n", | |
" \"task_specific_params\": {\n", | |
" \"text-generation\": {\n", | |
" \"do_sample\": true,\n", | |
" \"max_length\": 50\n", | |
" }\n", | |
" },\n", | |
" \"torch_dtype\": \"float32\",\n", | |
" \"transformers_version\": \"4.8.2\",\n", | |
" \"use_cache\": true,\n", | |
" \"vocab_size\": 50257\n", | |
"}\n", | |
"\n", | |
"loading file https://huggingface.co/ghosh-r/bangla-gpt2/resolve/main/vocab.json from cache at None\n", | |
"loading file https://huggingface.co/ghosh-r/bangla-gpt2/resolve/main/merges.txt from cache at None\n", | |
"loading file https://huggingface.co/ghosh-r/bangla-gpt2/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/8b3d4a6f9e80e0e5494bf8f1754de822dcdc06314d0994c9b68f7dbc1735cffa.9dfff3c35c0a768eb26244f921a71293e1adaa890f1468c982bad5568d6cd623\n", | |
"loading file https://huggingface.co/ghosh-r/bangla-gpt2/resolve/main/added_tokens.json from cache at None\n", | |
"loading file https://huggingface.co/ghosh-r/bangla-gpt2/resolve/main/special_tokens_map.json from cache at None\n", | |
"loading file https://huggingface.co/ghosh-r/bangla-gpt2/resolve/main/tokenizer_config.json from cache at None\n", | |
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "SPzHEeeUeHJZ", | |
"outputId": "aba9c81d-3f0e-4882-8ec8-ae95a2ce8c99" | |
}, | |
"source": [ | |
"poem = poet('আমি তোমাকে দেখেছি মোর হৃদয়ে', max_length=200)" | |
], | |
"execution_count": 18, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 69 | |
}, | |
"id": "oIOlTCTdxyKK", | |
"outputId": "c9742aca-b5d4-4b6f-bac1-869183ae4e5a" | |
}, | |
"source": [ | |
"poem[0]['generated_text']" | |
], | |
"execution_count": 19, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "string" | |
}, | |
"text/plain": [ | |
"'আমি তোমাকে দেখেছি মোর হৃদয়ের কুকুরের মতো।\\n\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0 মুহূর্তে সব নেভায়ে,\\n\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0 চিরদিন থাকে কেন আমার ঘরে?\\n\\xa0\\xa0\\xa0জীবন আমার হৃদয়ের পালঙ্কের একেবারে কাছে,\\n\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0আমার হৃদয়-সমুদ্র পারে।\\n\\xa0\\xa0\\xa0মনে হত আমার কত কত সংসারে\\n\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0\\xa0কত রাত্\\u200cরির কত বিহঙ্গ যেন হয়।\\n\\xa0\\xa0মনে হত পৃথিবীর কত মা যেন'" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 19 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "GMvtF1Vg2oBk" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment