Skip to content

Instantly share code, notes, and snippets.

@chetanambi
Created August 8, 2020 16:55
Show Gist options
  • Save chetanambi/bfb580b00422b3184e758596d2f67759 to your computer and use it in GitHub Desktop.
Save chetanambi/bfb580b00422b3184e758596d2f67759 to your computer and use it in GitHub Desktop.
NLP_with_Disaster_Tweets
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "NLP with Disaster Tweets.ipynb",
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"b455d75f7a104923b1bf6418de35860b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_7c3428e7e9644b72a6313330621bb1af",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_d12d68cf6a7f4c97a55f9c983b7bd574",
"IPY_MODEL_3d58054c927a4c3cb52fd97080bbab12"
]
}
},
"7c3428e7e9644b72a6313330621bb1af": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"d12d68cf6a7f4c97a55f9c983b7bd574": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_0986f24d906749e69106d5f2283567ee",
"_dom_classes": [],
"description": "Downloading: 100%",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 481,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 481,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_88f72c3b962e45f3a3024c2d0b90f56b"
}
},
"3d58054c927a4c3cb52fd97080bbab12": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_f874e0e8067d4762bf28fabe2d556d0f",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 481/481 [00:00<00:00, 1.55kB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_03dba9bb65c24669b7972e5225132dc4"
}
},
"0986f24d906749e69106d5f2283567ee": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "initial",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"88f72c3b962e45f3a3024c2d0b90f56b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"f874e0e8067d4762bf28fabe2d556d0f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"03dba9bb65c24669b7972e5225132dc4": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"edeb7d88a6b34ccc9fab83ec1e7b485a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_d63eb26f60e9444a8af68b104bf16162",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_39bc166e56604240bc8606854e82d5d7",
"IPY_MODEL_54c0d85ba4db49e79f4119f69db3986e"
]
}
},
"d63eb26f60e9444a8af68b104bf16162": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"39bc166e56604240bc8606854e82d5d7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_e4e6bf931212411b9f377e6b745b6684",
"_dom_classes": [],
"description": "Downloading: 100%",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 501200538,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 501200538,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_e13361f355884900ba31a96a5275952e"
}
},
"54c0d85ba4db49e79f4119f69db3986e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_de23c801a15d4b9e9387920168832916",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 501M/501M [00:19<00:00, 25.5MB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_c529d3191c74443e8bafea48ac6c0578"
}
},
"e4e6bf931212411b9f377e6b745b6684": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "initial",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"e13361f355884900ba31a96a5275952e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"de23c801a15d4b9e9387920168832916": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"c529d3191c74443e8bafea48ac6c0578": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"a189510e95d54ab8b731e4dbe4c251bb": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_ac22d5f851d946719bf131c05b28281b",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_d9ad0eea6c68467b952925e9eda85446",
"IPY_MODEL_f4805482ed8f4fa0b091473b92029a76"
]
}
},
"ac22d5f851d946719bf131c05b28281b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"d9ad0eea6c68467b952925e9eda85446": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_efadb3c4c30c4ca2a8fcf407cf5c9058",
"_dom_classes": [],
"description": "Downloading: 100%",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 898823,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 898823,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_5e858916b1cf4ca7a341110170f02b2b"
}
},
"f4805482ed8f4fa0b091473b92029a76": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_a5dafe4ae9b94ca19e7acba889cf6ef7",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 899k/899k [00:01<00:00, 563kB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_a7f38f11bcb749f09e0be41f02bb7298"
}
},
"efadb3c4c30c4ca2a8fcf407cf5c9058": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "initial",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"5e858916b1cf4ca7a341110170f02b2b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"a5dafe4ae9b94ca19e7acba889cf6ef7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"a7f38f11bcb749f09e0be41f02bb7298": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"d83a56f2745e427c906227391c2c41f0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_c73d993bafca430cb37a9e799e9934aa",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_f69264e66fd14f71ab1ed0885b502f8c",
"IPY_MODEL_3e6fd1d18b284bbfb7b89aefa2325e66"
]
}
},
"c73d993bafca430cb37a9e799e9934aa": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"f69264e66fd14f71ab1ed0885b502f8c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_791efcd9e33841238e644e10929f45a9",
"_dom_classes": [],
"description": "Downloading: 100%",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 456318,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 456318,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_e2ca11d7be5a4a0bbc21fe36c760ccfc"
}
},
"3e6fd1d18b284bbfb7b89aefa2325e66": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_5967caa8f12245b9b82ad2b4b7c82748",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 456k/456k [00:00<00:00, 1.08MB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_5b025b236224462ebc72d5dbe29999a3"
}
},
"791efcd9e33841238e644e10929f45a9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "initial",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"e2ca11d7be5a4a0bbc21fe36c760ccfc": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"5967caa8f12245b9b82ad2b4b7c82748": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"5b025b236224462ebc72d5dbe29999a3": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "12Gx5UyPvWzH",
"colab_type": "text"
},
"source": [
"# Real or Not? NLP with Disaster Tweets with Simple Transformers"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "FMPERLKFvi87",
"colab_type": "text"
},
"source": [
"## Import libraries and datasets"
]
},
{
"cell_type": "code",
"metadata": {
"id": "X_gJ7ANDM87h",
"colab_type": "code",
"colab": {
"resources": {
"http://localhost:8080/nbextensions/google.colab/files.js": {
"data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7CgpmdW5jdGlvbiBfdXBsb2FkRmlsZXMoaW5wdXRJZCwgb3V0cHV0SWQpIHsKICBjb25zdCBzdGVwcyA9IHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCk7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICAvLyBDYWNoZSBzdGVwcyBvbiB0aGUgb3V0cHV0RWxlbWVudCB0byBtYWtlIGl0IGF2YWlsYWJsZSBmb3IgdGhlIG5leHQgY2FsbAogIC8vIHRvIHVwbG9hZEZpbGVzQ29udGludWUgZnJvbSBQeXRob24uCiAgb3V0cHV0RWxlbWVudC5zdGVwcyA9IHN0ZXBzOwoKICByZXR1cm4gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpOwp9CgovLyBUaGlzIGlzIHJvdWdobHkgYW4gYXN5bmMgZ2VuZXJhdG9yIChub3Qgc3VwcG9ydGVkIGluIHRoZSBicm93c2VyIHlldCksCi8vIHdoZXJlIHRoZXJlIGFyZSBtdWx0aXBsZSBhc3luY2hyb25vdXMgc3RlcHMgYW5kIHRoZSBQeXRob24gc2lkZSBpcyBnb2luZwovLyB0byBwb2xsIGZvciBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcC4KLy8gVGhpcyB1c2VzIGEgUHJvbWlzZSB0byBibG9jayB0aGUgcHl0aG9uIHNpZGUgb24gY29tcGxldGlvbiBvZiBlYWNoIHN0ZXAsCi8vIHRoZW4gcGFzc2VzIHRoZSByZXN1bHQgb2YgdGhlIHByZXZpb3VzIHN0ZXAgYXMgdGhlIGlucHV0IHRvIHRoZSBuZXh0IHN0ZXAuCmZ1bmN0aW9uIF91cGxvYWRGaWxlc0NvbnRpbnVlKG91dHB1dElkKSB7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICBjb25zdCBzdGVwcyA9IG91dHB1dEVsZW1lbnQuc3RlcHM7CgogIGNvbnN0IG5leHQgPSBzdGVwcy5uZXh0KG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSk7CiAgcmV0dXJuIFByb21pc2UucmVzb2x2ZShuZXh0LnZhbHVlLnByb21pc2UpLnRoZW4oKHZhbHVlKSA9PiB7CiAgICAvLyBDYWNoZSB0aGUgbGFzdCBwcm9taXNlIHZhbHVlIHRvIG1ha2UgaXQgYXZhaWxhYmxlIHRvIHRoZSBuZXh0CiAgICAvLyBzdGVwIG9mIHRoZSBnZW5lcmF0b3IuCiAgICBvdXRwdXRFbGVtZW50Lmxhc3RQcm9taXNlVmFsdWUgPSB2YWx1ZTsKICAgIHJldHVybiBuZXh0LnZhbHVlLnJlc3BvbnNlOwogIH0pOwp9CgovKioKICogR2VuZXJhdG9yIGZ1bmN0aW9uIHdoaWNoIGlzIGNhbGxlZCBiZXR3ZWVuIGVhY2ggYXN5bmMgc3RlcCBvZiB0aGUgdXBsb2FkCiAqIHByb2Nlc3MuCiAqIEBwYXJhbSB7c3RyaW5nfSBpbnB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIGlucHV0IGZpbGUgcGlja2VyIGVsZW1lbnQuCiAqIEBwYXJhbSB7c3RyaW5nfSBvdXRwdXRJZCBFbGVtZW50IElEIG9mIHRoZSBvdXRwdXQgZGlzcGxheS4KICogQHJldHVybiB7IUl0ZXJhYmxlPCFPYmplY3Q+fSBJdGVyYWJsZSBvZiBuZXh0IHN0ZXBzLgogKi8KZnVuY3Rpb24qIHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IGlucHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKGlucHV0SWQpOwogIGlucHV0RWxlbWVudC5kaXNhYmxlZCA9IGZhbHNlOwoKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIG91dHB1dEVsZW1lbnQuaW5uZXJIVE1MID0gJyc7CgogIGNvbnN0IHBpY2tlZFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgaW5wdXRFbGVtZW50LmFkZEV2ZW50TGlzdGVuZXIoJ2NoYW5nZScsIChlKSA9PiB7CiAgICAgIHJlc29sdmUoZS50YXJnZXQuZmlsZXMpOwogICAgfSk7CiAgfSk7CgogIGNvbnN0IGNhbmNlbCA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2J1dHRvbicpOwogIGlucHV0RWxlbWVudC5wYXJlbnRFbGVtZW50LmFwcGVuZENoaWxkKGNhbmNlbCk7CiAgY2FuY2VsLnRleHRDb250ZW50ID0gJ0NhbmNlbCB1cGxvYWQnOwogIGNvbnN0IGNhbmNlbFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgY2FuY2VsLm9uY2xpY2sgPSAoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9OwogIH0pOwoKICAvLyBXYWl0IGZvciB0aGUgdXNlciB0byBwaWNrIHRoZSBmaWxlcy4KICBjb25zdCBmaWxlcyA9IHlpZWxkIHsKICAgIHByb21pc2U6IFByb21pc2UucmFjZShbcGlja2VkUHJvbWlzZSwgY2FuY2VsUHJvbWlzZV0pLAogICAgcmVzcG9uc2U6IHsKICAgICAgYWN0aW9uOiAnc3RhcnRpbmcnLAogICAgfQogIH07CgogIGNhbmNlbC5yZW1vdmUoKTsKCiAgLy8gRGlzYWJsZSB0aGUgaW5wdXQgZWxlbWVudCBzaW5jZSBmdXJ0aGVyIHBpY2tzIGFyZSBub3QgYWxsb3dlZC4KICBpbnB1dEVsZW1lbnQuZGlzYWJsZWQgPSB0cnVlOwoKICBpZiAoIWZpbGVzKSB7CiAgICByZXR1cm4gewogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgICAgfQogICAgfTsKICB9CgogIGZvciAoY29uc3QgZmlsZSBvZiBmaWxlcykgewogICAgY29uc3QgbGkgPSBkb2N1bWVudC5jcmVhdGVFbGVtZW50KCdsaScpOwogICAgbGkuYXBwZW5kKHNwYW4oZmlsZS5uYW1lLCB7Zm9udFdlaWdodDogJ2JvbGQnfSkpOwogICAgbGkuYXBwZW5kKHNwYW4oCiAgICAgICAgYCgke2ZpbGUudHlwZSB8fCAnbi9hJ30pIC0gJHtmaWxlLnNpemV9IGJ5dGVzLCBgICsKICAgICAgICBgbGFzdCBtb2RpZmllZDogJHsKICAgICAgICAgICAgZmlsZS5sYXN0TW9kaWZpZWREYXRlID8gZmlsZS5sYXN0TW9kaWZpZWREYXRlLnRvTG9jYWxlRGF0ZVN0cmluZygpIDoKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJ24vYSd9IC0gYCkpOwogICAgY29uc3QgcGVyY2VudCA9IHNwYW4oJzAlIGRvbmUnKTsKICAgIGxpLmFwcGVuZENoaWxkKHBlcmNlbnQpOwoKICAgIG91dHB1dEVsZW1lbnQuYXBwZW5kQ2hpbGQobGkpOwoKICAgIGNvbnN0IGZpbGVEYXRhUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICAgIGNvbnN0IHJlYWRlciA9IG5ldyBGaWxlUmVhZGVyKCk7CiAgICAgIHJlYWRlci5vbmxvYWQgPSAoZSkgPT4gewogICAgICAgIHJlc29sdmUoZS50YXJnZXQucmVzdWx0KTsKICAgICAgfTsKICAgICAgcmVhZGVyLnJlYWRBc0FycmF5QnVmZmVyKGZpbGUpOwogICAgfSk7CiAgICAvLyBXYWl0IGZvciB0aGUgZGF0YSB0byBiZSByZWFkeS4KICAgIGxldCBmaWxlRGF0YSA9IHlpZWxkIHsKICAgICAgcHJvbWlzZTogZmlsZURhdGFQcm9taXNlLAogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbnRpbnVlJywKICAgICAgfQogICAgfTsKCiAgICAvLyBVc2UgYSBjaHVua2VkIHNlbmRpbmcgdG8gYXZvaWQgbWVzc2FnZSBzaXplIGxpbWl0cy4gU2VlIGIvNjIxMTU2NjAuCiAgICBsZXQgcG9zaXRpb24gPSAwOwogICAgd2hpbGUgKHBvc2l0aW9uIDwgZmlsZURhdGEuYnl0ZUxlbmd0aCkgewogICAgICBjb25zdCBsZW5ndGggPSBNYXRoLm1pbihmaWxlRGF0YS5ieXRlTGVuZ3RoIC0gcG9zaXRpb24sIE1BWF9QQVlMT0FEX1NJWkUpOwogICAgICBjb25zdCBjaHVuayA9IG5ldyBVaW50OEFycmF5KGZpbGVEYXRhLCBwb3NpdGlvbiwgbGVuZ3RoKTsKICAgICAgcG9zaXRpb24gKz0gbGVuZ3RoOwoKICAgICAgY29uc3QgYmFzZTY0ID0gYnRvYShTdHJpbmcuZnJvbUNoYXJDb2RlLmFwcGx5KG51bGwsIGNodW5rKSk7CiAgICAgIHlpZWxkIHsKICAgICAgICByZXNwb25zZTogewogICAgICAgICAgYWN0aW9uOiAnYXBwZW5kJywKICAgICAgICAgIGZpbGU6IGZpbGUubmFtZSwKICAgICAgICAgIGRhdGE6IGJhc2U2NCwKICAgICAgICB9LAogICAgICB9OwogICAgICBwZXJjZW50LnRleHRDb250ZW50ID0KICAgICAgICAgIGAke01hdGgucm91bmQoKHBvc2l0aW9uIC8gZmlsZURhdGEuYnl0ZUxlbmd0aCkgKiAxMDApfSUgZG9uZWA7CiAgICB9CiAgfQoKICAvLyBBbGwgZG9uZS4KICB5aWVsZCB7CiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICB9CiAgfTsKfQoKc2NvcGUuZ29vZ2xlID0gc2NvcGUuZ29vZ2xlIHx8IHt9OwpzY29wZS5nb29nbGUuY29sYWIgPSBzY29wZS5nb29nbGUuY29sYWIgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYi5fZmlsZXMgPSB7CiAgX3VwbG9hZEZpbGVzLAogIF91cGxvYWRGaWxlc0NvbnRpbnVlLAp9Owp9KShzZWxmKTsK",
"ok": true,
"headers": [
[
"content-type",
"application/javascript"
]
],
"status": 200,
"status_text": ""
}
},
"base_uri": "https://localhost:8080/",
"height": 73
},
"outputId": "cbac8f8e-c200-4332-c8d8-799f3def14fb"
},
"source": [
"from google.colab import files\n",
"files.upload()\n",
"\n",
"!pip install -q kaggle\n",
"!mkdir ~/.kaggle\n",
"!cp kaggle.json ~/.kaggle/\n",
"!chmod 600 ~/.kaggle/kaggle.json"
],
"execution_count": 1,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" <input type=\"file\" id=\"files-6d56757a-0c5d-470f-abb0-a23f34f19ce3\" name=\"files[]\" multiple disabled\n",
" style=\"border:none\" />\n",
" <output id=\"result-6d56757a-0c5d-470f-abb0-a23f34f19ce3\">\n",
" Upload widget is only available when the cell has been executed in the\n",
" current browser session. Please rerun this cell to enable.\n",
" </output>\n",
" <script src=\"/nbextensions/google.colab/files.js\"></script> "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"Saving kaggle.json to kaggle.json\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "FCXx44zZL81K",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 185
},
"outputId": "612036d2-8981-44c5-cca8-29f6a66977c5"
},
"source": [
"!kaggle competitions download -c nlp-getting-started"
],
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"text": [
"Warning: Looks like you're using an outdated API Version, please consider updating (server 1.5.6 / client 1.5.4)\n",
"Downloading test.csv to /content\n",
" 0% 0.00/411k [00:00<?, ?B/s]\n",
"100% 411k/411k [00:00<00:00, 61.7MB/s]\n",
"Downloading train.csv to /content\n",
" 0% 0.00/965k [00:00<?, ?B/s]\n",
"100% 965k/965k [00:00<00:00, 64.6MB/s]\n",
"Downloading sample_submission.csv to /content\n",
" 0% 0.00/22.2k [00:00<?, ?B/s]\n",
"100% 22.2k/22.2k [00:00<00:00, 23.1MB/s]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "nK4y3ZqQOdMw",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "6b83e346-36d1-4a60-95ff-968ca3b05f9e"
},
"source": [
"!pip install --upgrade transformers\n",
"!pip install simpletransformers"
],
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": [
"Collecting transformers\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/27/3c/91ed8f5c4e7ef3227b4119200fc0ed4b4fd965b1f0172021c25701087825/transformers-3.0.2-py3-none-any.whl (769kB)\n",
"\r\u001b[K |▍ | 10kB 21.8MB/s eta 0:00:01\r\u001b[K |▉ | 20kB 4.5MB/s eta 0:00:01\r\u001b[K |█▎ | 30kB 5.7MB/s eta 0:00:01\r\u001b[K |█▊ | 40kB 6.0MB/s eta 0:00:01\r\u001b[K |██▏ | 51kB 4.8MB/s eta 0:00:01\r\u001b[K |██▋ | 61kB 5.4MB/s eta 0:00:01\r\u001b[K |███ | 71kB 5.9MB/s eta 0:00:01\r\u001b[K |███▍ | 81kB 6.5MB/s eta 0:00:01\r\u001b[K |███▉ | 92kB 6.9MB/s eta 0:00:01\r\u001b[K |████▎ | 102kB 6.7MB/s eta 0:00:01\r\u001b[K |████▊ | 112kB 6.7MB/s eta 0:00:01\r\u001b[K |█████▏ | 122kB 6.7MB/s eta 0:00:01\r\u001b[K |█████▌ | 133kB 6.7MB/s eta 0:00:01\r\u001b[K |██████ | 143kB 6.7MB/s eta 0:00:01\r\u001b[K |██████▍ | 153kB 6.7MB/s eta 0:00:01\r\u001b[K |██████▉ | 163kB 6.7MB/s eta 0:00:01\r\u001b[K |███████▎ | 174kB 6.7MB/s eta 0:00:01\r\u001b[K |███████▊ | 184kB 6.7MB/s eta 0:00:01\r\u001b[K |████████ | 194kB 6.7MB/s eta 0:00:01\r\u001b[K |████████▌ | 204kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████ | 215kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████▍ | 225kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████▉ | 235kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████▎ | 245kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████▋ | 256kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████ | 266kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████▌ | 276kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████ | 286kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████▍ | 296kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████▉ | 307kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████▏ | 317kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████▋ | 327kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████ | 337kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████▌ | 348kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████ | 358kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████▍ | 368kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████▊ | 378kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████▏ | 389kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████▋ | 399kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████ | 409kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████▌ | 419kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████ | 430kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████▎ | 440kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████▊ | 450kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████▏ | 460kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████▋ | 471kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████ | 481kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████▌ | 491kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████▉ | 501kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████████▎ | 512kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████████▊ | 522kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████████▏ | 532kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████████▋ | 542kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████████ | 552kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████████▍ | 563kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████████▉ | 573kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████████▎ | 583kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████████▊ | 593kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████████████▏ | 604kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████████████▋ | 614kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████████████ | 624kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████████████▍ | 634kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████████████▉ | 645kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████████████▎ | 655kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████████████▊ | 665kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████████████▏ | 675kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████████████▌ | 686kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████████████████ | 696kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▍ | 706kB 6.7MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▉ | 716kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▎ | 727kB 6.7MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▊ | 737kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████████████████ | 747kB 6.7MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▌| 757kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 768kB 6.7MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 778kB 6.7MB/s \n",
"\u001b[?25hCollecting sentencepiece!=0.1.92\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/d4/a4/d0a884c4300004a78cca907a6ff9a5e9fe4f090f5d95ab341c53d28cbc58/sentencepiece-0.1.91-cp36-cp36m-manylinux1_x86_64.whl (1.1MB)\n",
"\u001b[K |████████████████████████████████| 1.1MB 38.4MB/s \n",
"\u001b[?25hCollecting tokenizers==0.8.1.rc1\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/40/d0/30d5f8d221a0ed981a186c8eb986ce1c94e3a6e87f994eae9f4aa5250217/tokenizers-0.8.1rc1-cp36-cp36m-manylinux1_x86_64.whl (3.0MB)\n",
"\u001b[K |████████████████████████████████| 3.0MB 36.8MB/s \n",
"\u001b[?25hRequirement already satisfied, skipping upgrade: filelock in /usr/local/lib/python3.6/dist-packages (from transformers) (3.0.12)\n",
"Requirement already satisfied, skipping upgrade: numpy in /usr/local/lib/python3.6/dist-packages (from transformers) (1.18.5)\n",
"Requirement already satisfied, skipping upgrade: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers) (0.7)\n",
"Requirement already satisfied, skipping upgrade: requests in /usr/local/lib/python3.6/dist-packages (from transformers) (2.23.0)\n",
"Requirement already satisfied, skipping upgrade: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers) (2019.12.20)\n",
"Requirement already satisfied, skipping upgrade: packaging in /usr/local/lib/python3.6/dist-packages (from transformers) (20.4)\n",
"Collecting sacremoses\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)\n",
"\u001b[K |████████████████████████████████| 890kB 41.8MB/s \n",
"\u001b[?25hRequirement already satisfied, skipping upgrade: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers) (4.41.1)\n",
"Requirement already satisfied, skipping upgrade: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (3.0.4)\n",
"Requirement already satisfied, skipping upgrade: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2.10)\n",
"Requirement already satisfied, skipping upgrade: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (1.24.3)\n",
"Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2020.6.20)\n",
"Requirement already satisfied, skipping upgrade: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->transformers) (2.4.7)\n",
"Requirement already satisfied, skipping upgrade: six in /usr/local/lib/python3.6/dist-packages (from packaging->transformers) (1.15.0)\n",
"Requirement already satisfied, skipping upgrade: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (7.1.2)\n",
"Requirement already satisfied, skipping upgrade: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (0.16.0)\n",
"Building wheels for collected packages: sacremoses\n",
" Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893260 sha256=248a9276b72618d8e021779f36b78499151e0b8cda4d7a5857b8e05995317b2c\n",
" Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45\n",
"Successfully built sacremoses\n",
"Installing collected packages: sentencepiece, tokenizers, sacremoses, transformers\n",
"Successfully installed sacremoses-0.0.43 sentencepiece-0.1.91 tokenizers-0.8.1rc1 transformers-3.0.2\n",
"Collecting simpletransformers\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/3b/36/884727c20a4777105705cd6d01d57abfa7274d63a7aebb6d23d46b589d2d/simpletransformers-0.46.6-py3-none-any.whl (199kB)\n",
"\u001b[K |████████████████████████████████| 204kB 7.6MB/s \n",
"\u001b[?25hRequirement already satisfied: scikit-learn in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (0.22.2.post1)\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (1.0.5)\n",
"Collecting seqeval\n",
" Downloading https://files.pythonhosted.org/packages/34/91/068aca8d60ce56dd9ba4506850e876aba5e66a6f2f29aa223224b50df0de/seqeval-0.0.12.tar.gz\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (1.18.5)\n",
"Collecting tensorboardx\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/af/0c/4f41bcd45db376e6fe5c619c01100e9b7531c55791b7244815bac6eac32c/tensorboardX-2.1-py2.py3-none-any.whl (308kB)\n",
"\u001b[K |████████████████████████████████| 317kB 14.7MB/s \n",
"\u001b[?25hCollecting wandb\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/94/19/f8db9eff4b0173adf6dd2e8b0c3d8de0bfe10ec9ed63d247665980d82258/wandb-0.9.4-py2.py3-none-any.whl (1.4MB)\n",
"\u001b[K |████████████████████████████████| 1.4MB 18.5MB/s \n",
"\u001b[?25hRequirement already satisfied: scipy in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (1.4.1)\n",
"Collecting tqdm>=4.47.0\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/28/7e/281edb5bc3274dfb894d90f4dbacfceaca381c2435ec6187a2c6f329aed7/tqdm-4.48.2-py2.py3-none-any.whl (68kB)\n",
"\u001b[K |████████████████████████████████| 71kB 6.4MB/s \n",
"\u001b[?25hRequirement already satisfied: tokenizers in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (0.8.1rc1)\n",
"Requirement already satisfied: regex in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (2019.12.20)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (2.23.0)\n",
"Requirement already satisfied: transformers>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from simpletransformers) (3.0.2)\n",
"Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn->simpletransformers) (0.16.0)\n",
"Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->simpletransformers) (2018.9)\n",
"Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas->simpletransformers) (2.8.1)\n",
"Requirement already satisfied: Keras>=2.2.4 in /usr/local/lib/python3.6/dist-packages (from seqeval->simpletransformers) (2.4.3)\n",
"Requirement already satisfied: protobuf>=3.8.0 in /usr/local/lib/python3.6/dist-packages (from tensorboardx->simpletransformers) (3.12.4)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from tensorboardx->simpletransformers) (1.15.0)\n",
"Collecting docker-pycreds>=0.4.0\n",
" Downloading https://files.pythonhosted.org/packages/f5/e8/f6bd1eee09314e7e6dee49cbe2c5e22314ccdb38db16c9fc72d2fa80d054/docker_pycreds-0.4.0-py2.py3-none-any.whl\n",
"Collecting GitPython>=1.0.0\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/f9/1e/a45320cab182bf1c8656107b3d4c042e659742822fc6bff150d769a984dd/GitPython-3.1.7-py3-none-any.whl (158kB)\n",
"\u001b[K |████████████████████████████████| 163kB 41.4MB/s \n",
"\u001b[?25hCollecting watchdog>=0.8.3\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/0e/06/121302598a4fc01aca942d937f4a2c33430b7181137b35758913a8db10ad/watchdog-0.10.3.tar.gz (94kB)\n",
"\u001b[K |████████████████████████████████| 102kB 10.1MB/s \n",
"\u001b[?25hRequirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.6/dist-packages (from wandb->simpletransformers) (5.4.8)\n",
"Collecting sentry-sdk>=0.4.0\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/4b/23/811fcdfc9d67fea7e47c91dd553081218d53dda744c28384f4d2f69206c9/sentry_sdk-0.16.3-py2.py3-none-any.whl (110kB)\n",
"\u001b[K |████████████████████████████████| 112kB 37.7MB/s \n",
"\u001b[?25hRequirement already satisfied: PyYAML>=3.10 in /usr/local/lib/python3.6/dist-packages (from wandb->simpletransformers) (3.13)\n",
"Requirement already satisfied: Click>=7.0 in /usr/local/lib/python3.6/dist-packages (from wandb->simpletransformers) (7.1.2)\n",
"Collecting gql==0.2.0\n",
" Downloading https://files.pythonhosted.org/packages/c4/6f/cf9a3056045518f06184e804bae89390eb706168349daa9dff8ac609962a/gql-0.2.0.tar.gz\n",
"Collecting configparser>=3.8.1\n",
" Downloading https://files.pythonhosted.org/packages/4b/6b/01baa293090240cf0562cc5eccb69c6f5006282127f2b846fad011305c79/configparser-5.0.0-py3-none-any.whl\n",
"Requirement already satisfied: nvidia-ml-py3>=7.352.0 in /usr/local/lib/python3.6/dist-packages (from wandb->simpletransformers) (7.352.0)\n",
"Collecting subprocess32>=3.5.3\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/32/c8/564be4d12629b912ea431f1a50eb8b3b9d00f1a0b1ceff17f266be190007/subprocess32-3.5.4.tar.gz (97kB)\n",
"\u001b[K |████████████████████████████████| 102kB 10.5MB/s \n",
"\u001b[?25hCollecting shortuuid>=0.5.0\n",
" Downloading https://files.pythonhosted.org/packages/25/a6/2ecc1daa6a304e7f1b216f0896b26156b78e7c38e1211e9b798b4716c53d/shortuuid-1.0.1-py3-none-any.whl\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->simpletransformers) (3.0.4)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->simpletransformers) (2020.6.20)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->simpletransformers) (2.10)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->simpletransformers) (1.24.3)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers>=3.0.2->simpletransformers) (3.0.12)\n",
"Requirement already satisfied: sentencepiece!=0.1.92 in /usr/local/lib/python3.6/dist-packages (from transformers>=3.0.2->simpletransformers) (0.1.91)\n",
"Requirement already satisfied: sacremoses in /usr/local/lib/python3.6/dist-packages (from transformers>=3.0.2->simpletransformers) (0.0.43)\n",
"Requirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from transformers>=3.0.2->simpletransformers) (20.4)\n",
"Requirement already satisfied: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers>=3.0.2->simpletransformers) (0.7)\n",
"Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->simpletransformers) (2.10.0)\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.8.0->tensorboardx->simpletransformers) (49.2.0)\n",
"Collecting gitdb<5,>=4.0.1\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/48/11/d1800bca0a3bae820b84b7d813ad1eff15a48a64caea9c823fc8c1b119e8/gitdb-4.0.5-py3-none-any.whl (63kB)\n",
"\u001b[K |████████████████████████████████| 71kB 8.5MB/s \n",
"\u001b[?25hCollecting pathtools>=0.1.1\n",
" Downloading https://files.pythonhosted.org/packages/e7/7f/470d6fcdf23f9f3518f6b0b76be9df16dcc8630ad409947f8be2eb0ed13a/pathtools-0.1.2.tar.gz\n",
"Collecting graphql-core<2,>=0.5.0\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/b0/89/00ad5e07524d8c523b14d70c685e0299a8b0de6d0727e368c41b89b7ed0b/graphql-core-1.1.tar.gz (70kB)\n",
"\u001b[K |████████████████████████████████| 71kB 7.5MB/s \n",
"\u001b[?25hRequirement already satisfied: promise<3,>=2.0 in /usr/local/lib/python3.6/dist-packages (from gql==0.2.0->wandb->simpletransformers) (2.3)\n",
"Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->transformers>=3.0.2->simpletransformers) (2.4.7)\n",
"Collecting smmap<4,>=3.0.1\n",
" Downloading https://files.pythonhosted.org/packages/b0/9a/4d409a6234eb940e6a78dfdfc66156e7522262f5f2fecca07dc55915952d/smmap-3.0.4-py2.py3-none-any.whl\n",
"Building wheels for collected packages: seqeval, watchdog, gql, subprocess32, pathtools, graphql-core\n",
" Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for seqeval: filename=seqeval-0.0.12-cp36-none-any.whl size=7424 sha256=2474d10796e1cf6a39b0ceb57b0a74e039821f280a8f929724edcb71e1f64232\n",
" Stored in directory: /root/.cache/pip/wheels/4f/32/0a/df3b340a82583566975377d65e724895b3fad101a3fb729f68\n",
" Building wheel for watchdog (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for watchdog: filename=watchdog-0.10.3-cp36-none-any.whl size=73870 sha256=d3c9ad43d054be0e4b17eeb0395e98cc751f642a2b22ba85422f9c6711d3b1c7\n",
" Stored in directory: /root/.cache/pip/wheels/a8/1d/38/2c19bb311f67cc7b4d07a2ec5ea36ab1a0a0ea50db994a5bc7\n",
" Building wheel for gql (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for gql: filename=gql-0.2.0-cp36-none-any.whl size=7630 sha256=1ce7763303309ef6f96807f63b917a94d6a610bbc9cc456898f457e2e421af4e\n",
" Stored in directory: /root/.cache/pip/wheels/ce/0e/7b/58a8a5268655b3ad74feef5aa97946f0addafb3cbb6bd2da23\n",
" Building wheel for subprocess32 (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for subprocess32: filename=subprocess32-3.5.4-cp36-none-any.whl size=6489 sha256=934e338286a1eb4ca3a184c289fceb880c637eaa4e195efa14a4965b179e706a\n",
" Stored in directory: /root/.cache/pip/wheels/68/39/1a/5e402bdfdf004af1786c8b853fd92f8c4a04f22aad179654d1\n",
" Building wheel for pathtools (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for pathtools: filename=pathtools-0.1.2-cp36-none-any.whl size=8784 sha256=47a4319d7df7efb82a6ce887a7b938f558bbf7159e7bf41788c023a64b0616a5\n",
" Stored in directory: /root/.cache/pip/wheels/0b/04/79/c3b0c3a0266a3cb4376da31e5bfe8bba0c489246968a68e843\n",
" Building wheel for graphql-core (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for graphql-core: filename=graphql_core-1.1-cp36-none-any.whl size=104650 sha256=337dfd6e587a3f9622a4bf6611e6781265f9f8bd6ec84a6d042505cc2683a9e8\n",
" Stored in directory: /root/.cache/pip/wheels/45/99/d7/c424029bb0fe910c63b68dbf2aa20d3283d023042521bcd7d5\n",
"Successfully built seqeval watchdog gql subprocess32 pathtools graphql-core\n",
"Installing collected packages: seqeval, tensorboardx, docker-pycreds, smmap, gitdb, GitPython, pathtools, watchdog, sentry-sdk, graphql-core, gql, configparser, subprocess32, shortuuid, wandb, tqdm, simpletransformers\n",
" Found existing installation: tqdm 4.41.1\n",
" Uninstalling tqdm-4.41.1:\n",
" Successfully uninstalled tqdm-4.41.1\n",
"Successfully installed GitPython-3.1.7 configparser-5.0.0 docker-pycreds-0.4.0 gitdb-4.0.5 gql-0.2.0 graphql-core-1.1 pathtools-0.1.2 sentry-sdk-0.16.3 seqeval-0.0.12 shortuuid-1.0.1 simpletransformers-0.46.6 smmap-3.0.4 subprocess32-3.5.4 tensorboardx-2.1 tqdm-4.48.2 wandb-0.9.4 watchdog-0.10.3\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "vuoAC5cuv766",
"colab_type": "text"
},
"source": [
"## Pre-processing"
]
},
{
"cell_type": "code",
"metadata": {
"id": "qzEgLg_uN1im",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 87
},
"outputId": "deedd88f-5dcd-43b4-ecdf-b4713fe31fb2"
},
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"from sklearn.metrics import f1_score\n",
"from sklearn.model_selection import train_test_split\n",
"from simpletransformers.classification import ClassificationModel, ClassificationArgs"
],
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
" import pandas.util.testing as tm\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m W&B installed but not logged in. Run `wandb login` or set the WANDB_API_KEY env variable.\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "3nyCXC4mOMYK",
"colab_type": "code",
"colab": {}
},
"source": [
"train = pd.read_csv('/content/train.csv')\n",
"test = pd.read_csv('/content/test.csv')\n",
"sample_sub = pd.read_csv('/content/sample_submission.csv')"
],
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "5Ve200-QOb-n",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 195
},
"outputId": "cea71ae5-cd65-4a02-bcca-e628474bdcef"
},
"source": [
"train.head()"
],
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>keyword</th>\n",
" <th>location</th>\n",
" <th>text</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Our Deeds are the Reason of this #earthquake M...</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Forest fire near La Ronge Sask. Canada</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>All residents asked to 'shelter in place' are ...</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>6</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>13,000 people receive #wildfires evacuation or...</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Just got sent this photo from Ruby #Alaska as ...</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id keyword ... text target\n",
"0 1 NaN ... Our Deeds are the Reason of this #earthquake M... 1\n",
"1 4 NaN ... Forest fire near La Ronge Sask. Canada 1\n",
"2 5 NaN ... All residents asked to 'shelter in place' are ... 1\n",
"3 6 NaN ... 13,000 people receive #wildfires evacuation or... 1\n",
"4 7 NaN ... Just got sent this photo from Ruby #Alaska as ... 1\n",
"\n",
"[5 rows x 5 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 6
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "fvWVghC01pdB",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 195
},
"outputId": "2a1fc352-943f-47bb-d50d-100ecb0745c3"
},
"source": [
"sample_sub.head()"
],
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>11</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id target\n",
"0 0 0\n",
"1 2 0\n",
"2 3 0\n",
"3 9 0\n",
"4 11 0"
]
},
"metadata": {
"tags": []
},
"execution_count": 7
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "OpRGL9uwEo5p",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 50
},
"outputId": "ab9a3175-f799-4a84-a15e-15c3724a24b8"
},
"source": [
"print('Shape of train set {}'.format(train.shape))\n",
"print('Shape of test set {}'.format(test.shape))"
],
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"text": [
"Shape of train set (7613, 5)\n",
"Shape of test set (3263, 4)\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "7RSBpvP2ssKQ",
"colab_type": "code",
"colab": {}
},
"source": [
"train.drop(['id', 'keyword', 'location'], axis=1, inplace=True)\n",
"test.drop(['id', 'keyword', 'location'], axis=1, inplace=True)"
],
"execution_count": 9,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "aCCjVicXVbXb",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "6400c98f-6154-416c-aa99-6e690f0741f9"
},
"source": [
"train.isnull().sum().sum()\n",
"test.isnull().sum().sum()"
],
"execution_count": 10,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0"
]
},
"metadata": {
"tags": []
},
"execution_count": 10
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "f0OI9Pm3ZE_u",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 67
},
"outputId": "d0e8701f-f473-48cd-ffb1-27ecc8dbc0ad"
},
"source": [
"train['target'].value_counts()"
],
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0 4342\n",
"1 3271\n",
"Name: target, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 11
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "VMSFXUZZYgwz",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 279
},
"outputId": "e89b4b6c-29b3-4b96-990d-937f5d1f7963"
},
"source": [
"sns.countplot(train['target']);"
],
"execution_count": 12,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEGCAYAAACUzrmNAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAPJklEQVR4nO3de+zddX3H8eeLFmTGS9H+xrRllmizpW6K2gHTZNkgg8rUEhWD0dG5Zt0ytmiyuOGyjImyaObGvEyTZlQLWUTUbSBxMQ3izIxcWlEuZYSfF0YbtJVy8RLYiu/9cT7VH6W/fg6l51J+z0dy0u/38/1+z+/zSwrPnvP9nu9JVSFJ0sEcNekJSJKmn7GQJHUZC0lSl7GQJHUZC0lS1+JJT2AUli5dWitWrJj0NCTpiLJt27bvV9XMgbY9JWOxYsUKtm7dOulpSNIRJcnd823zbShJUpexkCR1GQtJUpexkCR1GQtJUpexkCR1GQtJUpexkCR1GQtJUtdT8hPch8Mr3nnZpKegKbTt786b9BSkifCVhSSpy1hIkrqMhSSpy1hIkrqMhSSpy1hIkrqMhSSpy1hIkrqMhSSpy1hIkrqMhSSpy1hIkrpGHoski5LcnOSatn5ikhuSzCb5VJJj2vjT2vps275iznO8q43fmeTMUc9ZkvRY43hl8Xbgjjnr7wcuqaoXAfcD69v4euD+Nn5J248kq4BzgRcDa4CPJlk0hnlLkpqRxiLJcuB3gH9u6wFOAz7TdtkMnN2W17Z12vbT2/5rgSuq6pGq+jYwC5w8ynlLkh5r1K8s/hH4c+Anbf25wANVtbet7wCWteVlwD0AbfuDbf+fjh/gmJ9KsiHJ1iRbd+/efbh/D0la0EYWiySvAXZV1bZR/Yy5qmpjVa2uqtUzMzPj+JGStGCM8pvyXgW8LslZwLHAs4APAkuSLG6vHpYDO9v+O4ETgB1JFgPPBu6bM77P3GMkSWMwslcWVfWuqlpeVSsYnKD+YlW9BbgOeGPbbR1wVVu+uq3Ttn+xqqqNn9uuljoRWAncOKp5S5IebxLfwf0XwBVJ3gvcDFzaxi8FLk8yC+xhEBiq6vYkVwLbgb3A+VX16PinLUkL11hiUVVfAr7Ulr/FAa5mqqqHgXPmOf5i4OLRzVCSdDB+gluS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1LV40hOQ9MT8z0W/OukpaAr94l/fOtLn95WFJKnLWEiSuoyFJKnLWEiSuoyFJKnLWEiSuoyFJKnLWEiSuoyFJKlrZLFIcmySG5N8I8ntSd7dxk9MckOS2SSfSnJMG39aW59t21fMea53tfE7k5w5qjlLkg5slK8sHgFOq6qXAicBa5KcCrwfuKSqXgTcD6xv+68H7m/jl7T9SLIKOBd4MbAG+GiSRSOctyRpPyOLRQ38sK0e3R4FnAZ8po1vBs5uy2vbOm376UnSxq+oqkeq6tvALHDyqOYtSXq8kZ6zSLIoydeBXcAW4JvAA1W1t+2yA1jWlpcB9wC07Q8Cz507foBj5v6sDUm2Jtm6e/fuUfw6krRgjTQWVfVoVZ0ELGfwauCXR/izNlbV6qpaPTMzM6ofI0kL0liuhqqqB4DrgF8HliTZd2v05cDOtrwTOAGgbX82cN/c8QMcI0kag1FeDTWTZElb/jngt4E7GETjjW23dcBVbfnqtk7b/sWqqjZ+brta6kRgJXDjqOYtSXq8UX750fOAze3KpaOAK6vqmiTbgSuSvBe4Gbi07X8pcHmSWWAPgyugqKrbk1wJbAf2AudX1aMjnLckaT8ji0VV3QK87ADj3+IAVzNV1cPAOfM818XAxYd7jpKk4fgJbklSl7GQJHUZC0lSl7GQJHUZC0lSl7GQJHUZC0lSl7GQJHUZC0lSl7GQJHUZC0lSl7GQJHUZC0lSl7GQJHUZC0lSl7GQJHUZC0lS11CxSHLtMGOSpKemg36tapJjgacDS5McB6RtehawbMRzkyRNid53cP8h8A7g+cA2fhaLh4CPjHBekqQpctBYVNUHgQ8m+dOq+vCY5iRJmjK9VxYAVNWHk7wSWDH3mKq6bETzkiRNkaFikeRy4IXA14FH23ABxkKSFoChYgGsBlZVVY1yMpKk6TTs5yxuA35hlBORJE2vYV9ZLAW2J7kReGTfYFW9biSzkiRNlWFj8TejnIQkaboNezXUf456IpKk6TXs1VA/YHD1E8AxwNHAj6rqWaOamCRpegz7yuKZ+5aTBFgLnDqqSUmSpssTvutsDfw7cOYI5iNJmkLDvg31+jmrRzH43MXDI5mRJGnqDHs11GvnLO8FvsPgrShJ0gIw7DmLt416IpKk6TXslx8tT/JvSXa1x2eTLB/15CRJ02HYE9wfB65m8L0Wzwc+18YkSQvAsLGYqaqPV9Xe9vgEMDPCeUmSpsiwsbgvyVuTLGqPtwL3jXJikqTpMWwsfh94E/Bd4F7gjcDvHeyAJCckuS7J9iS3J3l7G39Oki1J7mp/HtfGk+RDSWaT3JLk5XOea13b/64k6w7h95QkPQnDxuIiYF1VzVTVzzOIx7s7x+wF/qyqVjH4tPf5SVYBFwDXVtVK4Nq2DvBqYGV7bAA+BoO4ABcCpwAnAxfuC4wkaTyGjcVLqur+fStVtQd42cEOqKp7q+prbfkHwB3AMgafz9jcdtsMnN2W1wKXtU+IXw8sSfI8Bp8U31JVe9octgBrhpy3JOkwGDYWR83913z71/6wH+gjyQoGcbkBOL6q7m2bvgsc35aXAffMOWxHG5tvfP+fsSHJ1iRbd+/ePezUJElDGPZ/+H8PfDXJp9v6OcDFwxyY5BnAZ4F3VNVDg/sQDlRVJTksX9VaVRuBjQCrV6/2618l6TAa6pVFVV0GvB74Xnu8vqou7x2X5GgGofiXqvrXNvy99vYS7c9dbXwncMKcw5e3sfnGJUljMvRdZ6tqe1V9pD229/ZvtzK/FLijqv5hzqargX1XNK0Drpozfl67KupU4MH2dtUXgDOSHNfeCjujjUmSxmTo8w6H4FXA7wK3Jvl6G/tL4H3AlUnWA3czuCQX4PPAWcAs8GPgbTA4mZ7kPcBNbb+L2gl2SdKYjCwWVfVfQObZfPoB9i/g/HmeaxOw6fDNTpL0RDzhLz+SJC08xkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldI4tFkk1JdiW5bc7Yc5JsSXJX+/O4Np4kH0oym+SWJC+fc8y6tv9dSdaNar6SpPmN8pXFJ4A1+41dAFxbVSuBa9s6wKuBle2xAfgYDOICXAicApwMXLgvMJKk8RlZLKrqy8Ce/YbXApvb8mbg7Dnjl9XA9cCSJM8DzgS2VNWeqrof2MLjAyRJGrFxn7M4vqrubcvfBY5vy8uAe+bst6ONzTf+OEk2JNmaZOvu3bsP76wlaYGb2AnuqiqgDuPzbayq1VW1emZm5nA9rSSJ8cfie+3tJdqfu9r4TuCEOfstb2PzjUuSxmjcsbga2HdF0zrgqjnj57Wrok4FHmxvV30BOCPJce3E9hltTJI0RotH9cRJPgn8JrA0yQ4GVzW9D7gyyXrgbuBNbffPA2cBs8CPgbcBVNWeJO8Bbmr7XVRV+580lySN2MhiUVVvnmfT6QfYt4Dz53meTcCmwzg1SdIT5Ce4JUldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldxkKS1GUsJEldR0wskqxJcmeS2SQXTHo+krSQHBGxSLII+Cfg1cAq4M1JVk12VpK0cBwRsQBOBmar6ltV9b/AFcDaCc9JkhaMxZOewJCWAffMWd8BnDJ3hyQbgA1t9YdJ7hzT3BaCpcD3Jz2JaZAPrJv0FPRY/t3c58Icjmd5wXwbjpRYdFXVRmDjpOfxVJRka1WtnvQ8pP35d3N8jpS3oXYCJ8xZX97GJEljcKTE4iZgZZITkxwDnAtcPeE5SdKCcUS8DVVVe5P8CfAFYBGwqapun/C0FhLf3tO08u/mmKSqJj0HSdKUO1LehpIkTZCxkCR1GQsdlLdZ0TRKsinJriS3TXouC4Wx0Ly8zYqm2CeANZOexEJiLHQw3mZFU6mqvgzsmfQ8FhJjoYM50G1Wlk1oLpImyFhIkrqMhQ7G26xIAoyFDs7brEgCjIUOoqr2Avtus3IHcKW3WdE0SPJJ4KvALyXZkWT9pOf0VOftPiRJXb6ykCR1GQtJUpexkCR1GQtJUpexkCR1GQvpECRZkuSPx/BzzvbmjZoGxkI6NEuAoWORgUP57+1sBnf8lSbKz1lIhyDJvjvw3glcB7wEOA44GvirqroqyQoGH2i8AXgFcBZwHvBWYDeDmzRuq6oPJHkhg9vBzwA/Bv4AeA5wDfBge7yhqr45pl9ReozFk56AdIS6APiVqjopyWLg6VX1UJKlwPVJ9t0WZSWwrqquT/JrwBuAlzKIyteAbW2/jcAfVdVdSU4BPlpVp7XnuaaqPjPOX07an7GQnrwAf5vkN4CfMLiN+/Ft291VdX1bfhVwVVU9DDyc5HMASZ4BvBL4dJJ9z/m0cU1eGoaxkJ68tzB4++gVVfV/Sb4DHNu2/WiI448CHqiqk0Y0P+lJ8wS3dGh+ADyzLT8b2NVC8VvAC+Y55ivAa5Mc215NvAagqh4Cvp3kHPjpyfCXHuDnSBNjLKRDUFX3AV9JchtwErA6ya0MTmD/9zzH3MTgFu+3AP8B3MrgxDUMXp2sT/IN4HZ+9vW1VwDvTHJzOwkuTYRXQ0ljlOQZVfXDJE8HvgxsqKqvTXpeUo/nLKTx2tg+ZHcssNlQ6EjhKwtJUpfnLCRJXcZCktRlLCRJXcZCktRlLCRJXf8PSJ+98yzhfZMAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "lTb5QrxTqSJo",
"colab_type": "code",
"colab": {}
},
"source": [
"train.columns = ['text', 'labels']"
],
"execution_count": 13,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "qEK9n5wcqeC6",
"colab_type": "code",
"colab": {}
},
"source": [
"train_df, valid_df = train_test_split(train, test_size=0.2, stratify=train['labels'], random_state=42)"
],
"execution_count": 15,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "rKT3QDBVwDWg",
"colab_type": "text"
},
"source": [
"## Initialize a ClassificationModel"
]
},
{
"cell_type": "code",
"metadata": {
"id": "DAOGxHsZYsaX",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 316,
"referenced_widgets": [
"b455d75f7a104923b1bf6418de35860b",
"7c3428e7e9644b72a6313330621bb1af",
"d12d68cf6a7f4c97a55f9c983b7bd574",
"3d58054c927a4c3cb52fd97080bbab12",
"0986f24d906749e69106d5f2283567ee",
"88f72c3b962e45f3a3024c2d0b90f56b",
"f874e0e8067d4762bf28fabe2d556d0f",
"03dba9bb65c24669b7972e5225132dc4",
"edeb7d88a6b34ccc9fab83ec1e7b485a",
"d63eb26f60e9444a8af68b104bf16162",
"39bc166e56604240bc8606854e82d5d7",
"54c0d85ba4db49e79f4119f69db3986e",
"e4e6bf931212411b9f377e6b745b6684",
"e13361f355884900ba31a96a5275952e",
"de23c801a15d4b9e9387920168832916",
"c529d3191c74443e8bafea48ac6c0578",
"a189510e95d54ab8b731e4dbe4c251bb",
"ac22d5f851d946719bf131c05b28281b",
"d9ad0eea6c68467b952925e9eda85446",
"f4805482ed8f4fa0b091473b92029a76",
"efadb3c4c30c4ca2a8fcf407cf5c9058",
"5e858916b1cf4ca7a341110170f02b2b",
"a5dafe4ae9b94ca19e7acba889cf6ef7",
"a7f38f11bcb749f09e0be41f02bb7298",
"d83a56f2745e427c906227391c2c41f0",
"c73d993bafca430cb37a9e799e9934aa",
"f69264e66fd14f71ab1ed0885b502f8c",
"3e6fd1d18b284bbfb7b89aefa2325e66",
"791efcd9e33841238e644e10929f45a9",
"e2ca11d7be5a4a0bbc21fe36c760ccfc",
"5967caa8f12245b9b82ad2b4b7c82748",
"5b025b236224462ebc72d5dbe29999a3"
]
},
"outputId": "c5063809-7fa3-4353-a63e-08de055dcdf9"
},
"source": [
"model_args = ClassificationArgs(num_train_epochs=1, overwrite_output_dir=True, manual_seed=42, silent=True)\n",
"\n",
"model = ClassificationModel(model_type='roberta', model_name='roberta-base', use_cuda=True, num_labels=2, \n",
" args=model_args)"
],
"execution_count": 16,
"outputs": [
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b455d75f7a104923b1bf6418de35860b",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=481.0, style=ProgressStyle(description_…"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "edeb7d88a6b34ccc9fab83ec1e7b485a",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=501200538.0, style=ProgressStyle(descri…"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']\n",
"- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).\n",
"- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
],
"name": "stderr"
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a189510e95d54ab8b731e4dbe4c251bb",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=898823.0, style=ProgressStyle(descripti…"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d83a56f2745e427c906227391c2c41f0",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "anfYszv6wH7c",
"colab_type": "text"
},
"source": [
"## Train the model"
]
},
{
"cell_type": "code",
"metadata": {
"id": "AtMxAR-dbwpD",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 104
},
"outputId": "74faa44e-e662-4fdf-fd08-ca8a45ef0bcb"
},
"source": [
"model.train_model(train_df)"
],
"execution_count": 17,
"outputs": [
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/torch/optim/lr_scheduler.py:231: UserWarning: To get the last learning rate computed by the scheduler, please use `get_last_lr()`.\n",
" warnings.warn(\"To get the last learning rate computed by the scheduler, \"\n",
"/usr/local/lib/python3.6/dist-packages/torch/optim/lr_scheduler.py:200: UserWarning: Please also save or load the state of the optimzer when saving or loading the scheduler.\n",
" warnings.warn(SAVE_STATE_WARNING, UserWarning)\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "XriLtv5ewMe0",
"colab_type": "text"
},
"source": [
"## Evaluate the model"
]
},
{
"cell_type": "code",
"metadata": {
"id": "1pP36pI0qGl0",
"colab_type": "code",
"colab": {}
},
"source": [
"result, model_outputs, wrong_predictions = model.eval_model(valid_df)"
],
"execution_count": 18,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "XmMA1clU_ZYH",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 118
},
"outputId": "093d9136-61fb-4373-ae83-7b80585a0912"
},
"source": [
"result"
],
"execution_count": 19,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'eval_loss': 0.41687825866316625,\n",
" 'fn': 120,\n",
" 'fp': 125,\n",
" 'mcc': 0.6720497332586948,\n",
" 'tn': 744,\n",
" 'tp': 534}"
]
},
"metadata": {
"tags": []
},
"execution_count": 19
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "OcEGM23OBk7F",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "7f0c6edc-6702-470f-db02-8f269f66528c"
},
"source": [
"predictions = []\n",
"for x in model_outputs:\n",
" predictions.append(np.argmax(x))\n",
"\n",
"print('f1 score:', f1_score(valid_df['labels'], predictions))"
],
"execution_count": 20,
"outputs": [
{
"output_type": "stream",
"text": [
"f1 score: 0.8134044173648134\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "M0Kblsp1wqO1",
"colab_type": "text"
},
"source": [
"## Prediction on test set"
]
},
{
"cell_type": "code",
"metadata": {
"id": "CXhonaYc0KbT",
"colab_type": "code",
"colab": {}
},
"source": [
"test_predictions, raw_outputs = model.predict(test['text'])"
],
"execution_count": 21,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "8qCvpqRO1cl_",
"colab_type": "code",
"colab": {}
},
"source": [
"sample_sub['target'] = test_predictions"
],
"execution_count": 22,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "gGCcLBPT0YHm",
"colab_type": "code",
"colab": {}
},
"source": [
"sample_sub.to_csv('submission.csv', index=False)"
],
"execution_count": 23,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Bs17ReWN100v",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 17
},
"outputId": "49c50956-495f-4262-b8d3-efd1b25e43c6"
},
"source": [
"files.download('submission.csv')"
],
"execution_count": 24,
"outputs": [
{
"output_type": "display_data",
"data": {
"application/javascript": [
"\n",
" async function download(id, filename, size) {\n",
" if (!google.colab.kernel.accessAllowed) {\n",
" return;\n",
" }\n",
" const div = document.createElement('div');\n",
" const label = document.createElement('label');\n",
" label.textContent = `Downloading \"${filename}\": `;\n",
" div.appendChild(label);\n",
" const progress = document.createElement('progress');\n",
" progress.max = size;\n",
" div.appendChild(progress);\n",
" document.body.appendChild(div);\n",
"\n",
" const buffers = [];\n",
" let downloaded = 0;\n",
"\n",
" const channel = await google.colab.kernel.comms.open(id);\n",
" // Send a message to notify the kernel that we're ready.\n",
" channel.send({})\n",
"\n",
" for await (const message of channel.messages) {\n",
" // Send a message to notify the kernel that we're ready.\n",
" channel.send({})\n",
" if (message.buffers) {\n",
" for (const buffer of message.buffers) {\n",
" buffers.push(buffer);\n",
" downloaded += buffer.byteLength;\n",
" progress.value = downloaded;\n",
" }\n",
" }\n",
" }\n",
" const blob = new Blob(buffers, {type: 'application/binary'});\n",
" const a = document.createElement('a');\n",
" a.href = window.URL.createObjectURL(blob);\n",
" a.download = filename;\n",
" div.appendChild(a);\n",
" a.click();\n",
" div.remove();\n",
" }\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "display_data",
"data": {
"application/javascript": [
"download(\"download_a10074c6-5116-472c-a285-7b39b3c55c43\", \"submission.csv\", 22746)"
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {
"tags": []
}
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment