Created
August 8, 2020 16:55
-
-
Save chetanambi/bfb580b00422b3184e758596d2f67759 to your computer and use it in GitHub Desktop.
NLP_with_Disaster_Tweets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "NLP with Disaster Tweets.ipynb", | |
"provenance": [] | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"accelerator": "GPU", | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"b455d75f7a104923b1bf6418de35860b": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_7c3428e7e9644b72a6313330621bb1af", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_d12d68cf6a7f4c97a55f9c983b7bd574", | |
"IPY_MODEL_3d58054c927a4c3cb52fd97080bbab12" | |
] | |
} | |
}, | |
"7c3428e7e9644b72a6313330621bb1af": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"d12d68cf6a7f4c97a55f9c983b7bd574": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_0986f24d906749e69106d5f2283567ee", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 481, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 481, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_88f72c3b962e45f3a3024c2d0b90f56b" | |
} | |
}, | |
"3d58054c927a4c3cb52fd97080bbab12": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_f874e0e8067d4762bf28fabe2d556d0f", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 481/481 [00:00<00:00, 1.55kB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_03dba9bb65c24669b7972e5225132dc4" | |
} | |
}, | |
"0986f24d906749e69106d5f2283567ee": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"88f72c3b962e45f3a3024c2d0b90f56b": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"f874e0e8067d4762bf28fabe2d556d0f": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"03dba9bb65c24669b7972e5225132dc4": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"edeb7d88a6b34ccc9fab83ec1e7b485a": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_d63eb26f60e9444a8af68b104bf16162", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_39bc166e56604240bc8606854e82d5d7", | |
"IPY_MODEL_54c0d85ba4db49e79f4119f69db3986e" | |
] | |
} | |
}, | |
"d63eb26f60e9444a8af68b104bf16162": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"39bc166e56604240bc8606854e82d5d7": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_e4e6bf931212411b9f377e6b745b6684", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 501200538, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 501200538, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_e13361f355884900ba31a96a5275952e" | |
} | |
}, | |
"54c0d85ba4db49e79f4119f69db3986e": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_de23c801a15d4b9e9387920168832916", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 501M/501M [00:19<00:00, 25.5MB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_c529d3191c74443e8bafea48ac6c0578" | |
} | |
}, | |
"e4e6bf931212411b9f377e6b745b6684": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"e13361f355884900ba31a96a5275952e": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"de23c801a15d4b9e9387920168832916": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"c529d3191c74443e8bafea48ac6c0578": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"a189510e95d54ab8b731e4dbe4c251bb": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_ac22d5f851d946719bf131c05b28281b", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_d9ad0eea6c68467b952925e9eda85446", | |
"IPY_MODEL_f4805482ed8f4fa0b091473b92029a76" | |
] | |
} | |
}, | |
"ac22d5f851d946719bf131c05b28281b": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"d9ad0eea6c68467b952925e9eda85446": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_efadb3c4c30c4ca2a8fcf407cf5c9058", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 898823, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 898823, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_5e858916b1cf4ca7a341110170f02b2b" | |
} | |
}, | |
"f4805482ed8f4fa0b091473b92029a76": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_a5dafe4ae9b94ca19e7acba889cf6ef7", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 899k/899k [00:01<00:00, 563kB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_a7f38f11bcb749f09e0be41f02bb7298" | |
} | |
}, | |
"efadb3c4c30c4ca2a8fcf407cf5c9058": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"5e858916b1cf4ca7a341110170f02b2b": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"a5dafe4ae9b94ca19e7acba889cf6ef7": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"a7f38f11bcb749f09e0be41f02bb7298": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"d83a56f2745e427c906227391c2c41f0": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_c73d993bafca430cb37a9e799e9934aa", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_f69264e66fd14f71ab1ed0885b502f8c", | |
"IPY_MODEL_3e6fd1d18b284bbfb7b89aefa2325e66" | |
] | |
} | |
}, | |
"c73d993bafca430cb37a9e799e9934aa": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"f69264e66fd14f71ab1ed0885b502f8c": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_791efcd9e33841238e644e10929f45a9", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 456318, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 456318, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_e2ca11d7be5a4a0bbc21fe36c760ccfc" | |
} | |
}, | |
"3e6fd1d18b284bbfb7b89aefa2325e66": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_5967caa8f12245b9b82ad2b4b7c82748", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 456k/456k [00:00<00:00, 1.08MB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_5b025b236224462ebc72d5dbe29999a3" | |
} | |
}, | |
"791efcd9e33841238e644e10929f45a9": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"e2ca11d7be5a4a0bbc21fe36c760ccfc": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"5967caa8f12245b9b82ad2b4b7c82748": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"5b025b236224462ebc72d5dbe29999a3": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
} | |
} | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "12Gx5UyPvWzH", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"# Real or Not? NLP with Disaster Tweets with Simple Transformers" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "FMPERLKFvi87", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Import libraries and datasets" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "X_gJ7ANDM87h", | |
"colab_type": "code", | |
"colab": { | |
"resources": { | |
"http://localhost:8080/nbextensions/google.colab/files.js": { | |
"data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7CgpmdW5jdGlvbiBfdXBsb2FkRmlsZXMoaW5wdXRJZCwgb3V0cHV0SWQpIHsKICBjb25zdCBzdGVwcyA9IHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCk7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICAvLyBDYWNoZSBzdGVwcyBvbiB0aGUgb3V0cHV0RWxlbWVudCB0byBtYWtlIGl0IGF2YWlsYWJsZSBmb3IgdGhlIG5leHQgY2FsbAogIC8vIHRvIHVwbG9hZEZpbGVzQ29udGludWUgZnJvbSBQeXRob24uCiAgb3V0cHV0RWxlbWVudC5zdGVwcyA9IHN0ZXBzOwoKICByZXR1cm4gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpOwp9CgovLyBUaGlzIGlzIHJvdWdobHkgYW4gYXN5bmMgZ2VuZXJhdG9yIChub3Qgc3VwcG9ydGVkIGluIHRoZSBicm93c2VyIHlldCksCi8vIHdoZXJlIHRoZXJlIGFyZSBtdWx0aXBsZSBhc3luY2hyb25vdXMgc3RlcHMgYW5kIHRoZSBQeXRob24gc2lkZSBpcyBnb2luZwovLyB0byBwb2xsIGZvciBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcC4KLy8gVGhpcyB1c2VzIGEgUHJvbWlzZSB0byBibG9jayB0aGUgcHl0aG9uIHNpZGUgb24gY29tcGxldGlvbiBvZiBlYWNoIHN0ZXAsCi8vIHRoZW4gcGFzc2VzIHRoZSByZXN1bHQgb2YgdGhlIHByZXZpb3VzIHN0ZXAgYXMgdGhlIGlucHV0IHRvIHRoZSBuZXh0IHN0ZXAuCmZ1bmN0aW9uIF91cGxvYWRGaWxlc0NvbnRpbnVlKG91dHB1dElkKSB7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICBjb25zdCBzdGVwcyA9IG91dHB1dEVsZW1lbnQuc3RlcHM7CgogIGNvbnN0IG5leHQgPSBzdGVwcy5uZXh0KG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSk7CiAgcmV0dXJuIFByb21pc2UucmVzb2x2ZShuZXh0LnZhbHVlLnByb21pc2UpLnRoZW4oKHZhbHVlKSA9PiB7CiAgICAvLyBDYWNoZSB0aGUgbGFzdCBwcm9taXNlIHZhbHVlIHRvIG1ha2UgaXQgYXZhaWxhYmxlIHRvIHRoZSBuZXh0CiAgICAvLyBzdGVwIG9mIHRoZSBnZW5lcmF0b3IuCiAgICBvdXRwdXRFbGVtZW50Lmxhc3RQcm9taXNlVmFsdWUgPSB2YWx1ZTsKICAgIHJldHVybiBuZXh0LnZhbHVlLnJlc3BvbnNlOwogIH0pOwp9CgovKioKICogR2VuZXJhdG9yIGZ1bmN0aW9uIHdoaWNoIGlzIGNhbGxlZCBiZXR3ZWVuIGVhY2ggYXN5bmMgc3RlcCBvZiB0aGUgdXBsb2FkCiAqIHByb2Nlc3MuCiAqIEBwYXJhbSB7c3RyaW5nfSBpbnB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIGlucHV0IGZpbGUgcGlja2VyIGVsZW1lbnQuCiAqIEBwYXJhbSB7c3RyaW5nfSBvdXRwdXRJZCBFbGVtZW50IElEIG9mIHRoZSBvdXRwdXQgZGlzcGxheS4KICogQHJldHVybiB7IUl0ZXJhYmxlPCFPYmplY3Q+fSBJdGVyYWJsZSBvZiBuZXh0IHN0ZXBzLgogKi8KZnVuY3Rpb24qIHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IGlucHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKGlucHV0SWQpOwogIGlucHV0RWxlbWVudC5kaXNhYmxlZCA9IGZhbHNlOwoKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIG91dHB1dEVsZW1lbnQuaW5uZXJIVE1MID0gJyc7CgogIGNvbnN0IHBpY2tlZFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgaW5wdXRFbGVtZW50LmFkZEV2ZW50TGlzdGVuZXIoJ2NoYW5nZScsIChlKSA9PiB7CiAgICAgIHJlc29sdmUoZS50YXJnZXQuZmlsZXMpOwogICAgfSk7CiAgfSk7CgogIGNvbnN0IGNhbmNlbCA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2J1dHRvbicpOwogIGlucHV0RWxlbWVudC5wYXJlbnRFbGVtZW50LmFwcGVuZENoaWxkKGNhbmNlbCk7CiAgY2FuY2VsLnRleHRDb250ZW50ID0gJ0NhbmNlbCB1cGxvYWQnOwogIGNvbnN0IGNhbmNlbFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgY2FuY2VsLm9uY2xpY2sgPSAoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9OwogIH0pOwoKICAvLyBXYWl0IGZvciB0aGUgdXNlciB0byBwaWNrIHRoZSBmaWxlcy4KICBjb25zdCBmaWxlcyA9IHlpZWxkIHsKICAgIHByb21pc2U6IFByb21pc2UucmFjZShbcGlja2VkUHJvbWlzZSwgY2FuY2VsUHJvbWlzZV0pLAogICAgcmVzcG9uc2U6IHsKICAgICAgYWN0aW9uOiAnc3RhcnRpbmcnLAogICAgfQogIH07CgogIGNhbmNlbC5yZW1vdmUoKTsKCiAgLy8gRGlzYWJsZSB0aGUgaW5wdXQgZWxlbWVudCBzaW5jZSBmdXJ0aGVyIHBpY2tzIGFyZSBub3QgYWxsb3dlZC4KICBpbnB1dEVsZW1lbnQuZGlzYWJsZWQgPSB0cnVlOwoKICBpZiAoIWZpbGVzKSB7CiAgICByZXR1cm4gewogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgICAgfQogICAgfTsKICB9CgogIGZvciAoY29uc3QgZmlsZSBvZiBmaWxlcykgewogICAgY29uc3QgbGkgPSBkb2N1bWVudC5jcmVhdGVFbGVtZW50KCdsaScpOwogICAgbGkuYXBwZW5kKHNwYW4oZmlsZS5uYW1lLCB7Zm9udFdlaWdodDogJ2JvbGQnfSkpOwogICAgbGkuYXBwZW5kKHNwYW4oCiAgICAgICAgYCgke2ZpbGUudHlwZSB8fCAnbi9hJ30pIC0gJHtmaWxlLnNpemV9IGJ5dGVzLCBgICsKICAgICAgICBgbGFzdCBtb2RpZmllZDogJHsKICAgICAgICAgICAgZmlsZS5sYXN0TW9kaWZpZWREYXRlID8gZmlsZS5sYXN0TW9kaWZpZWREYXRlLnRvTG9jYWxlRGF0ZVN0cmluZygpIDoKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJ24vYSd9IC0gYCkpOwogICAgY29uc3QgcGVyY2VudCA9IHNwYW4oJzAlIGRvbmUnKTsKICAgIGxpLmFwcGVuZENoaWxkKHBlcmNlbnQpOwoKICAgIG91dHB1dEVsZW1lbnQuYXBwZW5kQ2hpbGQobGkpOwoKICAgIGNvbnN0IGZpbGVEYXRhUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICAgIGNvbnN0IHJlYWRlciA9IG5ldyBGaWxlUmVhZGVyKCk7CiAgICAgIHJlYWRlci5vbmxvYWQgPSAoZSkgPT4gewogICAgICAgIHJlc29sdmUoZS50YXJnZXQucmVzdWx0KTsKICAgICAgfTsKICAgICAgcmVhZGVyLnJlYWRBc0FycmF5QnVmZmVyKGZpbGUpOwogICAgfSk7CiAgICAvLyBXYWl0IGZvciB0aGUgZGF0YSB0byBiZSByZWFkeS4KICAgIGxldCBmaWxlRGF0YSA9IHlpZWxkIHsKICAgICAgcHJvbWlzZTogZmlsZURhdGFQcm9taXNlLAogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbnRpbnVlJywKICAgICAgfQogICAgfTsKCiAgICAvLyBVc2UgYSBjaHVua2VkIHNlbmRpbmcgdG8gYXZvaWQgbWVzc2FnZSBzaXplIGxpbWl0cy4gU2VlIGIvNjIxMTU2NjAuCiAgICBsZXQgcG9zaXRpb24gPSAwOwogICAgd2hpbGUgKHBvc2l0aW9uIDwgZmlsZURhdGEuYnl0ZUxlbmd0aCkgewogICAgICBjb25zdCBsZW5ndGggPSBNYXRoLm1pbihmaWxlRGF0YS5ieXRlTGVuZ3RoIC0gcG9zaXRpb24sIE1BWF9QQVlMT0FEX1NJWkUpOwogICAgICBjb25zdCBjaHVuayA9IG5ldyBVaW50OEFycmF5KGZpbGVEYXRhLCBwb3NpdGlvbiwgbGVuZ3RoKTsKICAgICAgcG9zaXRpb24gKz0gbGVuZ3RoOwoKICAgICAgY29uc3QgYmFzZTY0ID0gYnRvYShTdHJpbmcuZnJvbUNoYXJDb2RlLmFwcGx5KG51bGwsIGNodW5rKSk7CiAgICAgIHlpZWxkIHsKICAgICAgICByZXNwb25zZTogewogICAgICAgICAgYWN0aW9uOiAnYXBwZW5kJywKICAgICAgICAgIGZpbGU6IGZpbGUubmFtZSwKICAgICAgICAgIGRhdGE6IGJhc2U2NCwKICAgICAgICB9LAogICAgICB9OwogICAgICBwZXJjZW50LnRleHRDb250ZW50ID0KICAgICAgICAgIGAke01hdGgucm91bmQoKHBvc2l0aW9uIC8gZmlsZURhdGEuYnl0ZUxlbmd0aCkgKiAxMDApfSUgZG9uZWA7CiAgICB9CiAgfQoKICAvLyBBbGwgZG9uZS4KICB5aWVsZCB7CiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICB9CiAgfTsKfQoKc2NvcGUuZ29vZ2xlID0gc2NvcGUuZ29vZ2xlIHx8IHt9OwpzY29wZS5nb29nbGUuY29sYWIgPSBzY29wZS5nb29nbGUuY29sYWIgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYi5fZmlsZXMgPSB7CiAgX3VwbG9hZEZpbGVzLAogIF91cGxvYWRGaWxlc0NvbnRpbnVlLAp9Owp9KShzZWxmKTsK", | |
"ok": true, | |
"headers": [ | |
[ | |
"content-type", | |
"application/javascript" | |
] | |
], | |
"status": 200, | |
"status_text": "" | |
} | |
}, | |
"base_uri": "https://localhost:8080/", | |
"height": 73 | |
}, | |
"outputId": "cbac8f8e-c200-4332-c8d8-799f3def14fb" | |
}, | |
"source": [ | |
"from google.colab import files\n", | |
"files.upload()\n", | |
"\n", | |
"!pip install -q kaggle\n", | |
"!mkdir ~/.kaggle\n", | |
"!cp kaggle.json ~/.kaggle/\n", | |
"!chmod 600 ~/.kaggle/kaggle.json" | |
], | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"\n", | |
" <input type=\"file\" id=\"files-6d56757a-0c5d-470f-abb0-a23f34f19ce3\" name=\"files[]\" multiple disabled\n", | |
" style=\"border:none\" />\n", | |
" <output id=\"result-6d56757a-0c5d-470f-abb0-a23f34f19ce3\">\n", | |
" Upload widget is only available when the cell has been executed in the\n", | |
" current browser session. Please rerun this cell to enable.\n", | |
" </output>\n", | |
" <script src=\"/nbextensions/google.colab/files.js\"></script> " | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Saving kaggle.json to kaggle.json\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "FCXx44zZL81K", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 185 | |
}, | |
"outputId": "612036d2-8981-44c5-cca8-29f6a66977c5" | |
}, | |
"source": [ | |
"!kaggle competitions download -c nlp-getting-started" | |
], | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Warning: Looks like you're using an outdated API Version, please consider updating (server 1.5.6 / client 1.5.4)\n", | |
"Downloading test.csv to /content\n", | |
" 0% 0.00/411k [00:00<?, ?B/s]\n", | |
"100% 411k/411k [00:00<00:00, 61.7MB/s]\n", | |
"Downloading train.csv to /content\n", | |
" 0% 0.00/965k [00:00<?, ?B/s]\n", | |
"100% 965k/965k [00:00<00:00, 64.6MB/s]\n", | |
"Downloading sample_submission.csv to /content\n", | |
" 0% 0.00/22.2k [00:00<?, ?B/s]\n", | |
"100% 22.2k/22.2k [00:00<00:00, 23.1MB/s]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "nK4y3ZqQOdMw", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 1000 | |
}, | |
"outputId": "6b83e346-36d1-4a60-95ff-968ca3b05f9e" | |
}, | |
"source": [ | |
"!pip install --upgrade transformers\n", | |
"!pip install simpletransformers" | |
], | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Collecting transformers\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/27/3c/91ed8f5c4e7ef3227b4119200fc0ed4b4fd965b1f0172021c25701087825/transformers-3.0.2-py3-none-any.whl (769kB)\n", | |
"\r\u001b[K |▍ | 10kB 21.8MB/s eta 0:00:01\r\u001b[K |▉ | 20kB 4.5MB/s eta 0:00:01\r\u001b[K |█▎ | 30kB 5.7MB/s eta 0:00:01\r\u001b[K |█▊ | 40kB 6.0MB/s eta 0:00:01\r\u001b[K |██▏ | 51kB 4.8MB/s eta 0:00:01\r\u001b[K |██▋ | 61kB 5.4MB/s eta 0:00:01\r\u001b[K |███ | 71kB 5.9MB/s eta 0:00:01\r\u001b[K |███▍ | 81kB 6.5MB/s eta 0:00:01\r\u001b[K |███▉ | 92kB 6.9MB/s eta 0:00:01\r\u001b[K |████▎ | 102kB 6.7MB/s eta 0:00:01\r\u001b[K |████▊ | 112kB 6.7MB/s eta 0:00:01\r\u001b[K |█████▏ | 122kB 6.7MB/s eta 0:00:01\r\u001b[K |█████▌ | 133kB 6.7MB/s eta 0:00:01\r\u001b[K |██████ | 143kB 6.7MB/s eta 0:00:01\r\u001b[K |██████▍ | 153kB 6.7MB/s eta 0:00:01\r\u001b[K |██████▉ | 163kB 6.7MB/s eta 0:00:01\r\u001b[K |███████▎ | 174kB 6.7MB/s eta 0:00:01\r\u001b[K |███████▊ | 184kB 6.7MB/s eta 0:00:01\r\u001b[K |████████ | 194kB 6.7MB/s eta 0:00:01\r\u001b[K |████████▌ | 204kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████ | 215kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████▍ | 225kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████▉ | 235kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████▎ | 245kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████▋ | 256kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████ | 266kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████▌ | 276kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████ | 286kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████▍ | 296kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████▉ | 307kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████▏ | 317kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████▋ | 327kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████ | 337kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████▌ | 348kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████ | 358kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████▍ | 368kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████▊ | 378kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████▏ | 389kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████▋ | 399kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████ | 409kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████▌ | 419kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████ | 430kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████▎ | 440kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████▊ | 450kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████▏ | 460kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████▋ | 471kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████ | 481kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████▌ | 491kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████▉ | 501kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████████▎ | 512kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████████▊ | 522kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████████▏ | 532kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████████▋ | 542kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████████ | 552kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████████▍ | 563kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████████▉ | 573kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████████▎ | 583kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████████▊ | 593kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████████████▏ | 604kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████████████▋ | 614kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████████████ | 624kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████████████▍ | 634kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████████████▉ | 645kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████████████▎ | 655kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████████████▊ | 665kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████████████▏ | 675kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████████████▌ | 686kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████████████████ | 696kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▍ | 706kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▉ | 716kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▎ | 727kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▊ | 737kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████████████████ | 747kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▌| 757kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 768kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 778kB 6.7MB/s \n", | |
"\u001b[?25hCollecting sentencepiece!=0.1.92\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/d4/a4/d0a884c4300004a78cca907a6ff9a5e9fe4f090f5d95ab341c53d28cbc58/sentencepiece-0.1.91-cp36-cp36m-manylinux1_x86_64.whl (1.1MB)\n", | |
"\u001b[K |████████████████████████████████| 1.1MB 38.4MB/s \n", | |
"\u001b[?25hCollecting tokenizers==0.8.1.rc1\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/40/d0/30d5f8d221a0ed981a186c8eb986ce1c94e3a6e87f994eae9f4aa5250217/tokenizers-0.8.1rc1-cp36-cp36m-manylinux1_x86_64.whl (3.0MB)\n", | |
"\u001b[K |████████████████████████████████| 3.0MB 36.8MB/s \n", | |
"\u001b[?25hRequirement already satisfied, skipping upgrade: filelock in /usr/local/lib/python3.6/dist-packages (from transformers) (3.0.12)\n", | |
"Requirement already satisfied, skipping upgrade: numpy in /usr/local/lib/python3.6/dist-packages (from transformers) (1.18.5)\n", | |
"Requirement already satisfied, skipping upgrade: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers) (0.7)\n", | |
"Requirement already satisfied, skipping upgrade: requests in /usr/local/lib/python3.6/dist-packages (from transformers) (2.23.0)\n", | |
"Requirement already satisfied, skipping upgrade: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers) (2019.12.20)\n", | |
"Requirement already satisfied, skipping upgrade: packaging in /usr/local/lib/python3.6/dist-packages (from transformers) (20.4)\n", | |
"Collecting sacremoses\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)\n", | |
"\u001b[K |████████████████████████████████| 890kB 41.8MB/s \n", | |
"\u001b[?25hRequirement already satisfied, skipping upgrade: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers) (4.41.1)\n", | |
"Requirement already satisfied, skipping upgrade: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (3.0.4)\n", | |
"Requirement already satisfied, skipping upgrade: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2.10)\n", | |
"Requirement already satisfied, skipping upgrade: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (1.24.3)\n", | |
"Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2020.6.20)\n", | |
"Requirement already satisfied, skipping upgrade: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->transformers) (2.4.7)\n", | |
"Requirement already satisfied, skipping upgrade: six in /usr/local/lib/python3.6/dist-packages (from packaging->transformers) (1.15.0)\n", | |
"Requirement already satisfied, skipping upgrade: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (7.1.2)\n", | |
"Requirement already satisfied, skipping upgrade: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (0.16.0)\n", | |
"Building wheels for collected packages: sacremoses\n", | |
" Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893260 sha256=248a9276b72618d8e021779f36b78499151e0b8cda4d7a5857b8e05995317b2c\n", | |
" Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45\n", | |
"Successfully built sacremoses\n", | |
"Installing collected packages: sentencepiece, tokenizers, sacremoses, transformers\n", | |
"Successfully installed sacremoses-0.0.43 sentencepiece-0.1.91 tokenizers-0.8.1rc1 transformers-3.0.2\n", | |
"Collecting simpletransformers\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/3b/36/884727c20a4777105705cd6d01d57abfa7274d63a7aebb6d23d46b589d2d/simpletransformers-0.46.6-py3-none-any.whl (199kB)\n", | |
"\u001b[K |████████████████████████████████| 204kB 7.6MB/s \n", | |
"\u001b[?25hRequirement already satisfied: scikit-learn in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (0.22.2.post1)\n", | |
"Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (1.0.5)\n", | |
"Collecting seqeval\n", | |
" Downloading https://files.pythonhosted.org/packages/34/91/068aca8d60ce56dd9ba4506850e876aba5e66a6f2f29aa223224b50df0de/seqeval-0.0.12.tar.gz\n", | |
"Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (1.18.5)\n", | |
"Collecting tensorboardx\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/af/0c/4f41bcd45db376e6fe5c619c01100e9b7531c55791b7244815bac6eac32c/tensorboardX-2.1-py2.py3-none-any.whl (308kB)\n", | |
"\u001b[K |████████████████████████████████| 317kB 14.7MB/s \n", | |
"\u001b[?25hCollecting wandb\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/94/19/f8db9eff4b0173adf6dd2e8b0c3d8de0bfe10ec9ed63d247665980d82258/wandb-0.9.4-py2.py3-none-any.whl (1.4MB)\n", | |
"\u001b[K |████████████████████████████████| 1.4MB 18.5MB/s \n", | |
"\u001b[?25hRequirement already satisfied: scipy in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (1.4.1)\n", | |
"Collecting tqdm>=4.47.0\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/28/7e/281edb5bc3274dfb894d90f4dbacfceaca381c2435ec6187a2c6f329aed7/tqdm-4.48.2-py2.py3-none-any.whl (68kB)\n", | |
"\u001b[K |████████████████████████████████| 71kB 6.4MB/s \n", | |
"\u001b[?25hRequirement already satisfied: tokenizers in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (0.8.1rc1)\n", | |
"Requirement already satisfied: regex in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (2019.12.20)\n", | |
"Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (2.23.0)\n", | |
"Requirement already satisfied: transformers>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (3.0.2)\n", | |
"Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn->simpletransformers) (0.16.0)\n", | |
"Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->simpletransformers) (2018.9)\n", | |
"Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas->simpletransformers) (2.8.1)\n", | |
"Requirement already satisfied: Keras>=2.2.4 in /usr/local/lib/python3.6/dist-packages (from seqeval->simpletransformers) (2.4.3)\n", | |
"Requirement already satisfied: protobuf>=3.8.0 in /usr/local/lib/python3.6/dist-packages (from tensorboardx->simpletransformers) (3.12.4)\n", | |
"Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from tensorboardx->simpletransformers) (1.15.0)\n", | |
"Collecting docker-pycreds>=0.4.0\n", | |
" Downloading https://files.pythonhosted.org/packages/f5/e8/f6bd1eee09314e7e6dee49cbe2c5e22314ccdb38db16c9fc72d2fa80d054/docker_pycreds-0.4.0-py2.py3-none-any.whl\n", | |
"Collecting GitPython>=1.0.0\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/f9/1e/a45320cab182bf1c8656107b3d4c042e659742822fc6bff150d769a984dd/GitPython-3.1.7-py3-none-any.whl (158kB)\n", | |
"\u001b[K |████████████████████████████████| 163kB 41.4MB/s \n", | |
"\u001b[?25hCollecting watchdog>=0.8.3\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/0e/06/121302598a4fc01aca942d937f4a2c33430b7181137b35758913a8db10ad/watchdog-0.10.3.tar.gz (94kB)\n", | |
"\u001b[K |████████████████████████████████| 102kB 10.1MB/s \n", | |
"\u001b[?25hRequirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.6/dist-packages (from wandb->simpletransformers) (5.4.8)\n", | |
"Collecting sentry-sdk>=0.4.0\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/4b/23/811fcdfc9d67fea7e47c91dd553081218d53dda744c28384f4d2f69206c9/sentry_sdk-0.16.3-py2.py3-none-any.whl (110kB)\n", | |
"\u001b[K |████████████████████████████████| 112kB 37.7MB/s \n", | |
"\u001b[?25hRequirement already satisfied: PyYAML>=3.10 in /usr/local/lib/python3.6/dist-packages (from wandb->simpletransformers) (3.13)\n", | |
"Requirement already satisfied: Click>=7.0 in /usr/local/lib/python3.6/dist-packages (from wandb->simpletransformers) (7.1.2)\n", | |
"Collecting gql==0.2.0\n", | |
" Downloading https://files.pythonhosted.org/packages/c4/6f/cf9a3056045518f06184e804bae89390eb706168349daa9dff8ac609962a/gql-0.2.0.tar.gz\n", | |
"Collecting configparser>=3.8.1\n", | |
" Downloading https://files.pythonhosted.org/packages/4b/6b/01baa293090240cf0562cc5eccb69c6f5006282127f2b846fad011305c79/configparser-5.0.0-py3-none-any.whl\n", | |
"Requirement already satisfied: nvidia-ml-py3>=7.352.0 in /usr/local/lib/python3.6/dist-packages (from wandb->simpletransformers) (7.352.0)\n", | |
"Collecting subprocess32>=3.5.3\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/32/c8/564be4d12629b912ea431f1a50eb8b3b9d00f1a0b1ceff17f266be190007/subprocess32-3.5.4.tar.gz (97kB)\n", | |
"\u001b[K |████████████████████████████████| 102kB 10.5MB/s \n", | |
"\u001b[?25hCollecting shortuuid>=0.5.0\n", | |
" Downloading https://files.pythonhosted.org/packages/25/a6/2ecc1daa6a304e7f1b216f0896b26156b78e7c38e1211e9b798b4716c53d/shortuuid-1.0.1-py3-none-any.whl\n", | |
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->simpletransformers) (3.0.4)\n", | |
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->simpletransformers) (2020.6.20)\n", | |
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->simpletransformers) (2.10)\n", | |
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->simpletransformers) (1.24.3)\n", | |
"Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers>=3.0.2->simpletransformers) (3.0.12)\n", | |
"Requirement already satisfied: sentencepiece!=0.1.92 in /usr/local/lib/python3.6/dist-packages (from transformers>=3.0.2->simpletransformers) (0.1.91)\n", | |
"Requirement already satisfied: sacremoses in /usr/local/lib/python3.6/dist-packages (from transformers>=3.0.2->simpletransformers) (0.0.43)\n", | |
"Requirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from transformers>=3.0.2->simpletransformers) (20.4)\n", | |
"Requirement already satisfied: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers>=3.0.2->simpletransformers) (0.7)\n", | |
"Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->simpletransformers) (2.10.0)\n", | |
"Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.8.0->tensorboardx->simpletransformers) (49.2.0)\n", | |
"Collecting gitdb<5,>=4.0.1\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/48/11/d1800bca0a3bae820b84b7d813ad1eff15a48a64caea9c823fc8c1b119e8/gitdb-4.0.5-py3-none-any.whl (63kB)\n", | |
"\u001b[K |████████████████████████████████| 71kB 8.5MB/s \n", | |
"\u001b[?25hCollecting pathtools>=0.1.1\n", | |
" Downloading https://files.pythonhosted.org/packages/e7/7f/470d6fcdf23f9f3518f6b0b76be9df16dcc8630ad409947f8be2eb0ed13a/pathtools-0.1.2.tar.gz\n", | |
"Collecting graphql-core<2,>=0.5.0\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/b0/89/00ad5e07524d8c523b14d70c685e0299a8b0de6d0727e368c41b89b7ed0b/graphql-core-1.1.tar.gz (70kB)\n", | |
"\u001b[K |████████████████████████████████| 71kB 7.5MB/s \n", | |
"\u001b[?25hRequirement already satisfied: promise<3,>=2.0 in /usr/local/lib/python3.6/dist-packages (from gql==0.2.0->wandb->simpletransformers) (2.3)\n", | |
"Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->transformers>=3.0.2->simpletransformers) (2.4.7)\n", | |
"Collecting smmap<4,>=3.0.1\n", | |
" Downloading https://files.pythonhosted.org/packages/b0/9a/4d409a6234eb940e6a78dfdfc66156e7522262f5f2fecca07dc55915952d/smmap-3.0.4-py2.py3-none-any.whl\n", | |
"Building wheels for collected packages: seqeval, watchdog, gql, subprocess32, pathtools, graphql-core\n", | |
" Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for seqeval: filename=seqeval-0.0.12-cp36-none-any.whl size=7424 sha256=2474d10796e1cf6a39b0ceb57b0a74e039821f280a8f929724edcb71e1f64232\n", | |
" Stored in directory: /root/.cache/pip/wheels/4f/32/0a/df3b340a82583566975377d65e724895b3fad101a3fb729f68\n", | |
" Building wheel for watchdog (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for watchdog: filename=watchdog-0.10.3-cp36-none-any.whl size=73870 sha256=d3c9ad43d054be0e4b17eeb0395e98cc751f642a2b22ba85422f9c6711d3b1c7\n", | |
" Stored in directory: /root/.cache/pip/wheels/a8/1d/38/2c19bb311f67cc7b4d07a2ec5ea36ab1a0a0ea50db994a5bc7\n", | |
" Building wheel for gql (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for gql: filename=gql-0.2.0-cp36-none-any.whl size=7630 sha256=1ce7763303309ef6f96807f63b917a94d6a610bbc9cc456898f457e2e421af4e\n", | |
" Stored in directory: /root/.cache/pip/wheels/ce/0e/7b/58a8a5268655b3ad74feef5aa97946f0addafb3cbb6bd2da23\n", | |
" Building wheel for subprocess32 (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for subprocess32: filename=subprocess32-3.5.4-cp36-none-any.whl size=6489 sha256=934e338286a1eb4ca3a184c289fceb880c637eaa4e195efa14a4965b179e706a\n", | |
" Stored in directory: /root/.cache/pip/wheels/68/39/1a/5e402bdfdf004af1786c8b853fd92f8c4a04f22aad179654d1\n", | |
" Building wheel for pathtools (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for pathtools: filename=pathtools-0.1.2-cp36-none-any.whl size=8784 sha256=47a4319d7df7efb82a6ce887a7b938f558bbf7159e7bf41788c023a64b0616a5\n", | |
" Stored in directory: /root/.cache/pip/wheels/0b/04/79/c3b0c3a0266a3cb4376da31e5bfe8bba0c489246968a68e843\n", | |
" Building wheel for graphql-core (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for graphql-core: filename=graphql_core-1.1-cp36-none-any.whl size=104650 sha256=337dfd6e587a3f9622a4bf6611e6781265f9f8bd6ec84a6d042505cc2683a9e8\n", | |
" Stored in directory: /root/.cache/pip/wheels/45/99/d7/c424029bb0fe910c63b68dbf2aa20d3283d023042521bcd7d5\n", | |
"Successfully built seqeval watchdog gql subprocess32 pathtools graphql-core\n", | |
"Installing collected packages: seqeval, tensorboardx, docker-pycreds, smmap, gitdb, GitPython, pathtools, watchdog, sentry-sdk, graphql-core, gql, configparser, subprocess32, shortuuid, wandb, tqdm, simpletransformers\n", | |
" Found existing installation: tqdm 4.41.1\n", | |
" Uninstalling tqdm-4.41.1:\n", | |
" Successfully uninstalled tqdm-4.41.1\n", | |
"Successfully installed GitPython-3.1.7 configparser-5.0.0 docker-pycreds-0.4.0 gitdb-4.0.5 gql-0.2.0 graphql-core-1.1 pathtools-0.1.2 sentry-sdk-0.16.3 seqeval-0.0.12 shortuuid-1.0.1 simpletransformers-0.46.6 smmap-3.0.4 subprocess32-3.5.4 tensorboardx-2.1 tqdm-4.48.2 wandb-0.9.4 watchdog-0.10.3\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "vuoAC5cuv766", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Pre-processing" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "qzEgLg_uN1im", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 87 | |
}, | |
"outputId": "deedd88f-5dcd-43b4-ecdf-b4713fe31fb2" | |
}, | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import seaborn as sns\n", | |
"from sklearn.metrics import f1_score\n", | |
"from sklearn.model_selection import train_test_split\n", | |
"from simpletransformers.classification import ClassificationModel, ClassificationArgs" | |
], | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n", | |
" import pandas.util.testing as tm\n", | |
"\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "3nyCXC4mOMYK", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"train = pd.read_csv('/content/train.csv')\n", | |
"test = pd.read_csv('/content/test.csv')\n", | |
"sample_sub = pd.read_csv('/content/sample_submission.csv')" | |
], | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "5Ve200-QOb-n", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 195 | |
}, | |
"outputId": "cea71ae5-cd65-4a02-bcca-e628474bdcef" | |
}, | |
"source": [ | |
"train.head()" | |
], | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>keyword</th>\n", | |
" <th>location</th>\n", | |
" <th>text</th>\n", | |
" <th>target</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Our Deeds are the Reason of this #earthquake M...</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Forest fire near La Ronge Sask. Canada</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>5</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>All residents asked to 'shelter in place' are ...</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>6</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>13,000 people receive #wildfires evacuation or...</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>7</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Just got sent this photo from Ruby #Alaska as ...</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id keyword ... text target\n", | |
"0 1 NaN ... Our Deeds are the Reason of this #earthquake M... 1\n", | |
"1 4 NaN ... Forest fire near La Ronge Sask. Canada 1\n", | |
"2 5 NaN ... All residents asked to 'shelter in place' are ... 1\n", | |
"3 6 NaN ... 13,000 people receive #wildfires evacuation or... 1\n", | |
"4 7 NaN ... Just got sent this photo from Ruby #Alaska as ... 1\n", | |
"\n", | |
"[5 rows x 5 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 6 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "fvWVghC01pdB", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 195 | |
}, | |
"outputId": "2a1fc352-943f-47bb-d50d-100ecb0745c3" | |
}, | |
"source": [ | |
"sample_sub.head()" | |
], | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>target</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>9</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>11</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id target\n", | |
"0 0 0\n", | |
"1 2 0\n", | |
"2 3 0\n", | |
"3 9 0\n", | |
"4 11 0" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 7 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "OpRGL9uwEo5p", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 50 | |
}, | |
"outputId": "ab9a3175-f799-4a84-a15e-15c3724a24b8" | |
}, | |
"source": [ | |
"print('Shape of train set {}'.format(train.shape))\n", | |
"print('Shape of test set {}'.format(test.shape))" | |
], | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Shape of train set (7613, 5)\n", | |
"Shape of test set (3263, 4)\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "7RSBpvP2ssKQ", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"train.drop(['id', 'keyword', 'location'], axis=1, inplace=True)\n", | |
"test.drop(['id', 'keyword', 'location'], axis=1, inplace=True)" | |
], | |
"execution_count": 9, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "aCCjVicXVbXb", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "6400c98f-6154-416c-aa99-6e690f0741f9" | |
}, | |
"source": [ | |
"train.isnull().sum().sum()\n", | |
"test.isnull().sum().sum()" | |
], | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 10 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "f0OI9Pm3ZE_u", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 67 | |
}, | |
"outputId": "d0e8701f-f473-48cd-ffb1-27ecc8dbc0ad" | |
}, | |
"source": [ | |
"train['target'].value_counts()" | |
], | |
"execution_count": 11, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0 4342\n", | |
"1 3271\n", | |
"Name: target, dtype: int64" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 11 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "VMSFXUZZYgwz", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 279 | |
}, | |
"outputId": "e89b4b6c-29b3-4b96-990d-937f5d1f7963" | |
}, | |
"source": [ | |
"sns.countplot(train['target']);" | |
], | |
"execution_count": 12, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEGCAYAAACUzrmNAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAPJklEQVR4nO3de+zddX3H8eeLFmTGS9H+xrRllmizpW6K2gHTZNkgg8rUEhWD0dG5Zt0ytmiyuOGyjImyaObGvEyTZlQLWUTUbSBxMQ3izIxcWlEuZYSfF0YbtJVy8RLYiu/9cT7VH6W/fg6l51J+z0dy0u/38/1+z+/zSwrPnvP9nu9JVSFJ0sEcNekJSJKmn7GQJHUZC0lSl7GQJHUZC0lS1+JJT2AUli5dWitWrJj0NCTpiLJt27bvV9XMgbY9JWOxYsUKtm7dOulpSNIRJcnd823zbShJUpexkCR1GQtJUpexkCR1GQtJUpexkCR1GQtJUpexkCR1GQtJUtdT8hPch8Mr3nnZpKegKbTt786b9BSkifCVhSSpy1hIkrqMhSSpy1hIkrqMhSSpy1hIkrqMhSSpy1hIkrqMhSSpy1hIkrqMhSSpy1hIkrpGHoski5LcnOSatn5ikhuSzCb5VJJj2vjT2vps275iznO8q43fmeTMUc9ZkvRY43hl8Xbgjjnr7wcuqaoXAfcD69v4euD+Nn5J248kq4BzgRcDa4CPJlk0hnlLkpqRxiLJcuB3gH9u6wFOAz7TdtkMnN2W17Z12vbT2/5rgSuq6pGq+jYwC5w8ynlLkh5r1K8s/hH4c+Anbf25wANVtbet7wCWteVlwD0AbfuDbf+fjh/gmJ9KsiHJ1iRbd+/efbh/D0la0EYWiySvAXZV1bZR/Yy5qmpjVa2uqtUzMzPj+JGStGCM8pvyXgW8LslZwLHAs4APAkuSLG6vHpYDO9v+O4ETgB1JFgPPBu6bM77P3GMkSWMwslcWVfWuqlpeVSsYnKD+YlW9BbgOeGPbbR1wVVu+uq3Ttn+xqqqNn9uuljoRWAncOKp5S5IebxLfwf0XwBVJ3gvcDFzaxi8FLk8yC+xhEBiq6vYkVwLbgb3A+VX16PinLUkL11hiUVVfAr7Ulr/FAa5mqqqHgXPmOf5i4OLRzVCSdDB+gluS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1LV40hOQ9MT8z0W/OukpaAr94l/fOtLn95WFJKnLWEiSuoyFJKnLWEiSuoyFJKnLWEiSuoyFJKnLWEiSuoyFJKlrZLFIcmySG5N8I8ntSd7dxk9MckOS2SSfSnJMG39aW59t21fMea53tfE7k5w5qjlLkg5slK8sHgFOq6qXAicBa5KcCrwfuKSqXgTcD6xv+68H7m/jl7T9SLIKOBd4MbAG+GiSRSOctyRpPyOLRQ38sK0e3R4FnAZ8po1vBs5uy2vbOm376UnSxq+oqkeq6tvALHDyqOYtSXq8kZ6zSLIoydeBXcAW4JvAA1W1t+2yA1jWlpcB9wC07Q8Cz507foBj5v6sDUm2Jtm6e/fuUfw6krRgjTQWVfVoVZ0ELGfwauCXR/izNlbV6qpaPTMzM6ofI0kL0liuhqqqB4DrgF8HliTZd2v05cDOtrwTOAGgbX82cN/c8QMcI0kag1FeDTWTZElb/jngt4E7GETjjW23dcBVbfnqtk7b/sWqqjZ+brta6kRgJXDjqOYtSXq8UX750fOAze3KpaOAK6vqmiTbgSuSvBe4Gbi07X8pcHmSWWAPgyugqKrbk1wJbAf2AudX1aMjnLckaT8ji0VV3QK87ADj3+IAVzNV1cPAOfM818XAxYd7jpKk4fgJbklSl7GQJHUZC0lSl7GQJHUZC0lSl7GQJHUZC0lSl7GQJHUZC0lSl7GQJHUZC0lSl7GQJHUZC0lSl7GQJHUZC0lSl7GQJHUZC0lS11CxSHLtMGOSpKemg36tapJjgacDS5McB6RtehawbMRzkyRNid53cP8h8A7g+cA2fhaLh4CPjHBekqQpctBYVNUHgQ8m+dOq+vCY5iRJmjK9VxYAVNWHk7wSWDH3mKq6bETzkiRNkaFikeRy4IXA14FH23ABxkKSFoChYgGsBlZVVY1yMpKk6TTs5yxuA35hlBORJE2vYV9ZLAW2J7kReGTfYFW9biSzkiRNlWFj8TejnIQkaboNezXUf456IpKk6TXs1VA/YHD1E8AxwNHAj6rqWaOamCRpegz7yuKZ+5aTBFgLnDqqSUmSpssTvutsDfw7cOYI5iNJmkLDvg31+jmrRzH43MXDI5mRJGnqDHs11GvnLO8FvsPgrShJ0gIw7DmLt416IpKk6TXslx8tT/JvSXa1x2eTLB/15CRJ02HYE9wfB65m8L0Wzwc+18YkSQvAsLGYqaqPV9Xe9vgEMDPCeUmSpsiwsbgvyVuTLGqPtwL3jXJikqTpMWwsfh94E/Bd4F7gjcDvHeyAJCckuS7J9iS3J3l7G39Oki1J7mp/HtfGk+RDSWaT3JLk5XOea13b/64k6w7h95QkPQnDxuIiYF1VzVTVzzOIx7s7x+wF/qyqVjH4tPf5SVYBFwDXVtVK4Nq2DvBqYGV7bAA+BoO4ABcCpwAnAxfuC4wkaTyGjcVLqur+fStVtQd42cEOqKp7q+prbfkHwB3AMgafz9jcdtsMnN2W1wKXtU+IXw8sSfI8Bp8U31JVe9octgBrhpy3JOkwGDYWR83913z71/6wH+gjyQoGcbkBOL6q7m2bvgsc35aXAffMOWxHG5tvfP+fsSHJ1iRbd+/ePezUJElDGPZ/+H8PfDXJp9v6OcDFwxyY5BnAZ4F3VNVDg/sQDlRVJTksX9VaVRuBjQCrV6/2618l6TAa6pVFVV0GvB74Xnu8vqou7x2X5GgGofiXqvrXNvy99vYS7c9dbXwncMKcw5e3sfnGJUljMvRdZ6tqe1V9pD229/ZvtzK/FLijqv5hzqargX1XNK0Drpozfl67KupU4MH2dtUXgDOSHNfeCjujjUmSxmTo8w6H4FXA7wK3Jvl6G/tL4H3AlUnWA3czuCQX4PPAWcAs8GPgbTA4mZ7kPcBNbb+L2gl2SdKYjCwWVfVfQObZfPoB9i/g/HmeaxOw6fDNTpL0RDzhLz+SJC08xkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldI4tFkk1JdiW5bc7Yc5JsSXJX+/O4Np4kH0oym+SWJC+fc8y6tv9dSdaNar6SpPmN8pXFJ4A1+41dAFxbVSuBa9s6wKuBle2xAfgYDOICXAicApwMXLgvMJKk8RlZLKrqy8Ce/YbXApvb8mbg7Dnjl9XA9cCSJM8DzgS2VNWeqrof2MLjAyRJGrFxn7M4vqrubcvfBY5vy8uAe+bst6ONzTf+OEk2JNmaZOvu3bsP76wlaYGb2AnuqiqgDuPzbayq1VW1emZm5nA9rSSJ8cfie+3tJdqfu9r4TuCEOfstb2PzjUuSxmjcsbga2HdF0zrgqjnj57Wrok4FHmxvV30BOCPJce3E9hltTJI0RotH9cRJPgn8JrA0yQ4GVzW9D7gyyXrgbuBNbffPA2cBs8CPgbcBVNWeJO8Bbmr7XVRV+580lySN2MhiUVVvnmfT6QfYt4Dz53meTcCmwzg1SdIT5Ce4JUldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldR0wskqxJcmeS2SQXTHo+krSQHBGxSLII+Cfg1cAq4M1JVk12VpK0cBwRsQBOBmar6ltV9b/AFcDaCc9JkhaMxZOewJCWAffMWd8BnDJ3hyQbgA1t9YdJ7hzT3BaCpcD3Jz2JaZAPrJv0FPRY/t3c58Icjmd5wXwbjpRYdFXVRmDjpOfxVJRka1WtnvQ8pP35d3N8jpS3oXYCJ8xZX97GJEljcKTE4iZgZZITkxwDnAtcPeE5SdKCcUS8DVVVe5P8CfAFYBGwqapun/C0FhLf3tO08u/mmKSqJj0HSdKUO1LehpIkTZCxkCR1GQsdlLdZ0TRKsinJriS3TXouC4Wx0Ly8zYqm2CeANZOexEJiLHQw3mZFU6mqvgzsmfQ8FhJjoYM50G1Wlk1oLpImyFhIkrqMhQ7G26xIAoyFDs7brEgCjIUOoqr2Avtus3IHcKW3WdE0SPJJ4KvALyXZkWT9pOf0VOftPiRJXb6ykCR1GQtJUpexkCR1GQtJUpexkCR1GQvpECRZkuSPx/BzzvbmjZoGxkI6NEuAoWORgUP57+1sBnf8lSbKz1lIhyDJvjvw3glcB7wEOA44GvirqroqyQoGH2i8AXgFcBZwHvBWYDeDmzRuq6oPJHkhg9vBzwA/Bv4AeA5wDfBge7yhqr45pl9ReozFk56AdIS6APiVqjopyWLg6VX1UJKlwPVJ9t0WZSWwrqquT/JrwBuAlzKIyteAbW2/jcAfVdVdSU4BPlpVp7XnuaaqPjPOX07an7GQnrwAf5vkN4CfMLiN+/Ft291VdX1bfhVwVVU9DDyc5HMASZ4BvBL4dJJ9z/m0cU1eGoaxkJ68tzB4++gVVfV/Sb4DHNu2/WiI448CHqiqk0Y0P+lJ8wS3dGh+ADyzLT8b2NVC8VvAC+Y55ivAa5Mc215NvAagqh4Cvp3kHPjpyfCXHuDnSBNjLKRDUFX3AV9JchtwErA6ya0MTmD/9zzH3MTgFu+3AP8B3MrgxDUMXp2sT/IN4HZ+9vW1VwDvTHJzOwkuTYRXQ0ljlOQZVfXDJE8HvgxsqKqvTXpeUo/nLKTx2tg+ZHcssNlQ6EjhKwtJUpfnLCRJXcZCktRlLCRJXcZCktRlLCRJXf8PSJ+98yzhfZMAAAAASUVORK5CYII=\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": { | |
"tags": [], | |
"needs_background": "light" | |
} | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "lTb5QrxTqSJo", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"train.columns = ['text', 'labels']" | |
], | |
"execution_count": 13, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "qEK9n5wcqeC6", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"train_df, valid_df = train_test_split(train, test_size=0.2, stratify=train['labels'], random_state=42)" | |
], | |
"execution_count": 15, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "rKT3QDBVwDWg", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Initialize a ClassificationModel" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "DAOGxHsZYsaX", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 316, | |
"referenced_widgets": [ | |
"b455d75f7a104923b1bf6418de35860b", | |
"7c3428e7e9644b72a6313330621bb1af", | |
"d12d68cf6a7f4c97a55f9c983b7bd574", | |
"3d58054c927a4c3cb52fd97080bbab12", | |
"0986f24d906749e69106d5f2283567ee", | |
"88f72c3b962e45f3a3024c2d0b90f56b", | |
"f874e0e8067d4762bf28fabe2d556d0f", | |
"03dba9bb65c24669b7972e5225132dc4", | |
"edeb7d88a6b34ccc9fab83ec1e7b485a", | |
"d63eb26f60e9444a8af68b104bf16162", | |
"39bc166e56604240bc8606854e82d5d7", | |
"54c0d85ba4db49e79f4119f69db3986e", | |
"e4e6bf931212411b9f377e6b745b6684", | |
"e13361f355884900ba31a96a5275952e", | |
"de23c801a15d4b9e9387920168832916", | |
"c529d3191c74443e8bafea48ac6c0578", | |
"a189510e95d54ab8b731e4dbe4c251bb", | |
"ac22d5f851d946719bf131c05b28281b", | |
"d9ad0eea6c68467b952925e9eda85446", | |
"f4805482ed8f4fa0b091473b92029a76", | |
"efadb3c4c30c4ca2a8fcf407cf5c9058", | |
"5e858916b1cf4ca7a341110170f02b2b", | |
"a5dafe4ae9b94ca19e7acba889cf6ef7", | |
"a7f38f11bcb749f09e0be41f02bb7298", | |
"d83a56f2745e427c906227391c2c41f0", | |
"c73d993bafca430cb37a9e799e9934aa", | |
"f69264e66fd14f71ab1ed0885b502f8c", | |
"3e6fd1d18b284bbfb7b89aefa2325e66", | |
"791efcd9e33841238e644e10929f45a9", | |
"e2ca11d7be5a4a0bbc21fe36c760ccfc", | |
"5967caa8f12245b9b82ad2b4b7c82748", | |
"5b025b236224462ebc72d5dbe29999a3" | |
] | |
}, | |
"outputId": "c5063809-7fa3-4353-a63e-08de055dcdf9" | |
}, | |
"source": [ | |
"model_args = ClassificationArgs(num_train_epochs=1, overwrite_output_dir=True, manual_seed=42, silent=True)\n", | |
"\n", | |
"model = ClassificationModel(model_type='roberta', model_name='roberta-base', use_cuda=True, num_labels=2, \n", | |
" args=model_args)" | |
], | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "b455d75f7a104923b1bf6418de35860b", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=481.0, style=ProgressStyle(description_…" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "edeb7d88a6b34ccc9fab83ec1e7b485a", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=501200538.0, style=ProgressStyle(descri…" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']\n", | |
"- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n", | |
"- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", | |
"Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']\n", | |
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "a189510e95d54ab8b731e4dbe4c251bb", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=898823.0, style=ProgressStyle(descripti…" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "d83a56f2745e427c906227391c2c41f0", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "anfYszv6wH7c", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Train the model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "AtMxAR-dbwpD", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 104 | |
}, | |
"outputId": "74faa44e-e662-4fdf-fd08-ca8a45ef0bcb" | |
}, | |
"source": [ | |
"model.train_model(train_df)" | |
], | |
"execution_count": 17, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.6/dist-packages/torch/optim/lr_scheduler.py:231: UserWarning: To get the last learning rate computed by the scheduler, please use `get_last_lr()`.\n", | |
" warnings.warn(\"To get the last learning rate computed by the scheduler, \"\n", | |
"/usr/local/lib/python3.6/dist-packages/torch/optim/lr_scheduler.py:200: UserWarning: Please also save or load the state of the optimzer when saving or loading the scheduler.\n", | |
" warnings.warn(SAVE_STATE_WARNING, UserWarning)\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "XriLtv5ewMe0", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Evaluate the model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "1pP36pI0qGl0", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"result, model_outputs, wrong_predictions = model.eval_model(valid_df)" | |
], | |
"execution_count": 18, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "XmMA1clU_ZYH", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 118 | |
}, | |
"outputId": "093d9136-61fb-4373-ae83-7b80585a0912" | |
}, | |
"source": [ | |
"result" | |
], | |
"execution_count": 19, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'eval_loss': 0.41687825866316625,\n", | |
" 'fn': 120,\n", | |
" 'fp': 125,\n", | |
" 'mcc': 0.6720497332586948,\n", | |
" 'tn': 744,\n", | |
" 'tp': 534}" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 19 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "OcEGM23OBk7F", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "7f0c6edc-6702-470f-db02-8f269f66528c" | |
}, | |
"source": [ | |
"predictions = []\n", | |
"for x in model_outputs:\n", | |
" predictions.append(np.argmax(x))\n", | |
"\n", | |
"print('f1 score:', f1_score(valid_df['labels'], predictions))" | |
], | |
"execution_count": 20, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"f1 score: 0.8134044173648134\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "M0Kblsp1wqO1", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"## Prediction on test set" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "CXhonaYc0KbT", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"test_predictions, raw_outputs = model.predict(test['text'])" | |
], | |
"execution_count": 21, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "8qCvpqRO1cl_", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"sample_sub['target'] = test_predictions" | |
], | |
"execution_count": 22, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "gGCcLBPT0YHm", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"sample_sub.to_csv('submission.csv', index=False)" | |
], | |
"execution_count": 23, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Bs17ReWN100v", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 17 | |
}, | |
"outputId": "49c50956-495f-4262-b8d3-efd1b25e43c6" | |
}, | |
"source": [ | |
"files.download('submission.csv')" | |
], | |
"execution_count": 24, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/javascript": [ | |
"\n", | |
" async function download(id, filename, size) {\n", | |
" if (!google.colab.kernel.accessAllowed) {\n", | |
" return;\n", | |
" }\n", | |
" const div = document.createElement('div');\n", | |
" const label = document.createElement('label');\n", | |
" label.textContent = `Downloading \"${filename}\": `;\n", | |
" div.appendChild(label);\n", | |
" const progress = document.createElement('progress');\n", | |
" progress.max = size;\n", | |
" div.appendChild(progress);\n", | |
" document.body.appendChild(div);\n", | |
"\n", | |
" const buffers = [];\n", | |
" let downloaded = 0;\n", | |
"\n", | |
" const channel = await google.colab.kernel.comms.open(id);\n", | |
" // Send a message to notify the kernel that we're ready.\n", | |
" channel.send({})\n", | |
"\n", | |
" for await (const message of channel.messages) {\n", | |
" // Send a message to notify the kernel that we're ready.\n", | |
" channel.send({})\n", | |
" if (message.buffers) {\n", | |
" for (const buffer of message.buffers) {\n", | |
" buffers.push(buffer);\n", | |
" downloaded += buffer.byteLength;\n", | |
" progress.value = downloaded;\n", | |
" }\n", | |
" }\n", | |
" }\n", | |
" const blob = new Blob(buffers, {type: 'application/binary'});\n", | |
" const a = document.createElement('a');\n", | |
" a.href = window.URL.createObjectURL(blob);\n", | |
" a.download = filename;\n", | |
" div.appendChild(a);\n", | |
" a.click();\n", | |
" div.remove();\n", | |
" }\n", | |
" " | |
], | |
"text/plain": [ | |
"<IPython.core.display.Javascript object>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/javascript": [ | |
"download(\"download_a10074c6-5116-472c-a285-7b39b3c55c43\", \"submission.csv\", 22746)" | |
], | |
"text/plain": [ | |
"<IPython.core.display.Javascript object>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment