sujnesh/58d3710b-3636-40f2-a05f-a6a4c5811929.ipynb

## 58d3710b-3636-40f2-a05f-a6a4c5811929.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Baseline_Tiring-Text.ipynb",
      "provenance": [],
      "collapsed_sections": [
        "hMUMGoZZ2wCd"
      ],
      "toc_visible": true,
      "machine_shape": "hm"
    },
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.8.2"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "56ec863219f54a738e4751f2ff2fc224": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_23e6231b5d2842c786c94edc1bec5e2d",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_d60eefd729d54d7bb6573608a5239777",
              "IPY_MODEL_3b7a17acfdcb486e97bf495967ca9e96"
            ]
          }
        },
        "23e6231b5d2842c786c94edc1bec5e2d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "d60eefd729d54d7bb6573608a5239777": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_82f16f77ef35467eaa9b24b8c4119f3d",
            "_dom_classes": [],
            "description": "sample_submission.csv: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 136432,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 136432,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_99c81ddf1e7b40e2a35303b07a924352"
          }
        },
        "3b7a17acfdcb486e97bf495967ca9e96": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_bb4b8b3018404fbba6105e76c0fa71e1",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 136k/136k [00:01&lt;00:00, 79.3kB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_1b088d4d92e947649ddedbae733e2cb2"
          }
        },
        "82f16f77ef35467eaa9b24b8c4119f3d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "99c81ddf1e7b40e2a35303b07a924352": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "bb4b8b3018404fbba6105e76c0fa71e1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "1b088d4d92e947649ddedbae733e2cb2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "4a5e2ff9b2f446568095638fedfb74dd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_f61f398a8a7a4c669a7bf7dbd98602ae",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_8c45723f66744534b8de3a82870a4e31",
              "IPY_MODEL_a51dc8009441496194bf27e29aad6b71"
            ]
          }
        },
        "f61f398a8a7a4c669a7bf7dbd98602ae": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "8c45723f66744534b8de3a82870a4e31": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_2f247d747bd6452aa25939d29b77d6a2",
            "_dom_classes": [],
            "description": "train.csv: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 40848154,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 40848154,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_c4db15c331d345fc90be8e85ce8e6d38"
          }
        },
        "a51dc8009441496194bf27e29aad6b71": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_88903aefb9ea4788968268456fecd4fe",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 40.8M/40.8M [00:01&lt;00:00, 25.5MB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_fbea753198f04e879a33ec1fcc93cdff"
          }
        },
        "2f247d747bd6452aa25939d29b77d6a2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "c4db15c331d345fc90be8e85ce8e6d38": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "88903aefb9ea4788968268456fecd4fe": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "fbea753198f04e879a33ec1fcc93cdff": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "79e7f1397f37494fa860418164a95e8c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_13b16297c93c4f968c796ed772903852",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_733c27c51c9840c6927136e087b79932",
              "IPY_MODEL_36489520d13946508d422c743ebc36f6"
            ]
          }
        },
        "13b16297c93c4f968c796ed772903852": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "733c27c51c9840c6927136e087b79932": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_470113171b81410794f5f5b484f01575",
            "_dom_classes": [],
            "description": "test.csv: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 10099240,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 10099240,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_25cb24a0f9c546538cc9bd8693718746"
          }
        },
        "36489520d13946508d422c743ebc36f6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_b6bb3dd94e2d47d3ada869f71b3d6c68",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 10.1M/10.1M [00:01&lt;00:00, 6.36MB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_6272412804714fb38c8273654f8734f7"
          }
        },
        "470113171b81410794f5f5b484f01575": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "25cb24a0f9c546538cc9bd8693718746": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "b6bb3dd94e2d47d3ada869f71b3d6c68": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "6272412804714fb38c8273654f8734f7": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "90d3e52efacd45c9827ce735f1cd0e85": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_84ba412f0ea04a4aaded73fc67772907",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_f5dbbf7d9eb54e01ba5bd93797f8c9a4",
              "IPY_MODEL_bb2208c383f34a15b7ea2082255fd30e"
            ]
          }
        },
        "84ba412f0ea04a4aaded73fc67772907": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "f5dbbf7d9eb54e01ba5bd93797f8c9a4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_ada95e53ecdb44fc811a51684aa841af",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 442,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 442,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_8a9b75d84ac44178bf152003ed9d0ebb"
          }
        },
        "bb2208c383f34a15b7ea2082255fd30e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_55970aa3a0b34a5aba102debe970c2a0",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 442/442 [00:00&lt;00:00, 2.38kB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_9139c047bc3241fb9f415388c3fe4d87"
          }
        },
        "ada95e53ecdb44fc811a51684aa841af": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "8a9b75d84ac44178bf152003ed9d0ebb": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "55970aa3a0b34a5aba102debe970c2a0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "9139c047bc3241fb9f415388c3fe4d87": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ZtEKOsBu2wCX"
      },
      "source": [
        "# Getting Started Code for Tiring Text Challenge\n",
        "#### Authors : \n",
        "👾[Shrey Gupta](https://www.linkedin.com/in/shrey-gupta-18273518a/)\n",
        "\n",
        "🚀[Gurkirat Singh](https://www.linkedin.com/in/gsc2001/)        \n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "iDNH3tML2wCZ"
      },
      "source": [
        "## Download Necessary Packages"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "iyjnts_i2wCZ",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "14bb3ce5-d452-4708-ff9f-b3c72dd6c2d8"
      },
      "source": [
        "import sys\n",
        "!pip install numpy\n",
        "!pip install pandas\n",
        "!pip install scikit-learn\n",
        "\n",
        "!pip install git+https://gitlab.aicrowd.com/aicrowd/aicrowd-cli.git >/dev/null\n",
        "%load_ext aicrowd.magic"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (1.19.5)\n",
            "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (1.1.5)\n",
            "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas) (2018.9)\n",
            "Requirement already satisfied: numpy>=1.15.4 in /usr/local/lib/python3.7/dist-packages (from pandas) (1.19.5)\n",
            "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas) (2.8.1)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n",
            "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.7/dist-packages (0.23.2)\n",
            "Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.7/dist-packages (from scikit-learn) (1.19.5)\n",
            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn) (2.1.0)\n",
            "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn) (1.0.1)\n",
            "Requirement already satisfied: scipy>=0.19.1 in /usr/local/lib/python3.7/dist-packages (from scikit-learn) (1.4.1)\n",
            "  Running command git clone -q https://gitlab.aicrowd.com/aicrowd/aicrowd-cli.git /tmp/pip-req-build-7vpbl_ag\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "hMUMGoZZ2wCd"
      },
      "source": [
        "## Download data\n",
        "The first step is to download out training and testing dataset. We will be training a classifier on the training data and make predictions testing data. We submit our predictions\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "KM91aXAw2wCe",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "2bdacce8-eb48-428f-9005-8acd7940a761"
      },
      "source": [
        "API_KEY = \"cf76ebef9e6a4a74e59d8638c00a4832\" # Please enter your API Key [https://www.aicrowd.com/participants/me]\n",
        "%aicrowd login --api-key $API_KEY"
      ],
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "\u001b[32mAPI Key valid\u001b[0m\n",
            "\u001b[32mSaved API Key successfully!\u001b[0m\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 300,
          "referenced_widgets": [
            "56ec863219f54a738e4751f2ff2fc224",
            "23e6231b5d2842c786c94edc1bec5e2d",
            "d60eefd729d54d7bb6573608a5239777",
            "3b7a17acfdcb486e97bf495967ca9e96",
            "82f16f77ef35467eaa9b24b8c4119f3d",
            "99c81ddf1e7b40e2a35303b07a924352",
            "bb4b8b3018404fbba6105e76c0fa71e1",
            "1b088d4d92e947649ddedbae733e2cb2",
            "4a5e2ff9b2f446568095638fedfb74dd",
            "f61f398a8a7a4c669a7bf7dbd98602ae",
            "8c45723f66744534b8de3a82870a4e31",
            "a51dc8009441496194bf27e29aad6b71",
            "2f247d747bd6452aa25939d29b77d6a2",
            "c4db15c331d345fc90be8e85ce8e6d38",
            "88903aefb9ea4788968268456fecd4fe",
            "fbea753198f04e879a33ec1fcc93cdff",
            "79e7f1397f37494fa860418164a95e8c",
            "13b16297c93c4f968c796ed772903852",
            "733c27c51c9840c6927136e087b79932",
            "36489520d13946508d422c743ebc36f6",
            "470113171b81410794f5f5b484f01575",
            "25cb24a0f9c546538cc9bd8693718746",
            "b6bb3dd94e2d47d3ada869f71b3d6c68",
            "6272412804714fb38c8273654f8734f7"
          ]
        },
        "id": "s30YU5uIAZNV",
        "outputId": "8e9231cb-d155-4457-f7eb-3bae2d18bc31"
      },
      "source": [
        "%aicrowd dataset list -c tiring-text\n",
        "%aicrowd dataset download -c tiring-text -j 3"
      ],
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "              Datasets for challenge #748                                       \n",
            "┌───┬───────────────────────┬─────────────┬───────────┐                         \n",
            "│ # │ Title                 │ Description │      Size │                         \n",
            "├───┼───────────────────────┼─────────────┼───────────┤                         \n",
            "│ 0 │ sample_submission.csv │ -           │ 136.43 KB │                         \n",
            "│ 1 │ test.csv              │ -           │  10.10 MB │                         \n",
            "│ 2 │ train.csv             │ -           │  40.85 MB │                         \n",
            "└───┴───────────────────────┴─────────────┴───────────┘                         \n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "56ec863219f54a738e4751f2ff2fc224",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='sample_submission.csv', max=136432.0, style=ProgressStyle…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "4a5e2ff9b2f446568095638fedfb74dd",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='train.csv', max=40848154.0, style=ProgressStyle(descripti…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "79e7f1397f37494fa860418164a95e8c",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='test.csv', max=10099240.0, style=ProgressStyle(descriptio…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\n",
            "\n",
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "U7H2PP692wCg"
      },
      "source": [
        "\n",
        "## Import packages"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zv0sEu7z2wCg"
      },
      "source": [
        "import pandas as pd\n",
        "import numpy as np\n",
        "import seaborn as sns\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.feature_extraction.text import CountVectorizer\n",
        "from sklearn.feature_extraction.text import TfidfTransformer\n",
        "from sklearn.naive_bayes import MultinomialNB\n",
        "from sklearn.pipeline import Pipeline\n",
        "from sklearn.neural_network import MLPClassifier\n",
        "from sklearn.svm import SVC\n",
        "from sklearn.tree import DecisionTreeClassifier \n",
        "from sklearn.linear_model import LogisticRegression\n",
        "from sklearn.metrics import f1_score,precision_score,recall_score,accuracy_score,log_loss"
      ],
      "execution_count": 20,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Pc0brLwc2wCi"
      },
      "source": [
        "## Load Data\n",
        "- We use pandas 🐼 library to load our data.   \n",
        "- Pandas loads the data into dataframes and facilitates us to analyse the data.   \n",
        "- Learn more about it [here](https://www.tutorialspoint.com/python_data_science/python_pandas.htm) 🤓"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Mugz2FSA2wCm"
      },
      "source": [
        "train_data = pd.read_csv(\"train.csv\")"
      ],
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "NMtGgNBv2wCo"
      },
      "source": [
        "## Visualize the data 👀"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "mEKcS2Lr2wCp",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 204
        },
        "outputId": "fc062778-f312-4e3f-a7d8-c1e8ce5b4970"
      },
      "source": [
        "train_data.head()"
      ],
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>text</th>\n",
              "      <th>tag</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>. CHIEF JUSTICE ROBERTS , JUSTICE PRYOR . JUST...</td>\n",
              "      <td>news</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>want this pawn like not even to exist because ...</td>\n",
              "      <td>chess</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>? If I was n't already at that URL , where mig...</td>\n",
              "      <td>programming</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>a power is 3x squared the derivative of positi...</td>\n",
              "      <td>math</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Knight to f2 check and the White King has no m...</td>\n",
              "      <td>chess</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "                                                text          tag\n",
              "0  . CHIEF JUSTICE ROBERTS , JUSTICE PRYOR . JUST...         news\n",
              "1  want this pawn like not even to exist because ...        chess\n",
              "2  ? If I was n't already at that URL , where mig...  programming\n",
              "3  a power is 3x squared the derivative of positi...         math\n",
              "4  Knight to f2 check and the White King has no m...        chess"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 6
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "lfzexz9yATFA",
        "outputId": "141cf760-f9fc-407b-d8f6-ef7a0ae151ff"
      },
      "source": [
        "train_data.shape"
      ],
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(79376, 2)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 7
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "bubtKctQ2wCr"
      },
      "source": [
        "The dataset contains texts along with the labels as unscrambled or scrambled."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XcgFJtIq2wCx",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "fea19cc7-05aa-4239-8cd4-e70ded5f7025"
      },
      "source": [
        "X,y = train_data['text'],train_data['tag']\n",
        "X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)\n",
        "print(X_train.shape)\n",
        "print(X_val.shape)\n",
        "print(y_train.shape)"
      ],
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "(63500,)\n",
            "(15876,)\n",
            "(63500,)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "QQp1N8J5mgne"
      },
      "source": [
        "# TRAINING PHASE 🏋️"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ZVnSWiiDy_-J"
      },
      "source": [
        "## Preprocessing\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "kJ4ygkVIzPLU"
      },
      "source": [
        "Text files are actually series of words (ordered). In order to run machine learning algorithms we need to convert the text files into numerical feature vectors. We will be using `bag of words` model for our example. Briefly, we segment each text file into words (for English splitting by space), and count number of times each word occurs in each document and finally assign each word an integer id. Each unique word in our dictionary will correspond to a feature (descriptive feature).\n",
        "\n",
        "Scikit-learn has a high level component which will create feature vectors for us `CountVectorizer`. More about it [here](https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html)."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "c1eMWafzzmRb",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "b64b6d25-3c9b-43c8-e36b-532608d7bcd4"
      },
      "source": [
        "count_vect = CountVectorizer()\n",
        "X_train_counts = count_vect.fit_transform(X_train)\n",
        "X_train_counts.shape"
      ],
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(63500, 86505)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 9
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "dM_Mu07-ztyX"
      },
      "source": [
        "Here by doing `count_vect.fit_transform(X_train)`, we are learning the vocabulary dictionary and it returns a Document-Term matrix. [n_samples, n_features]."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "25d4q7_oz2Ob"
      },
      "source": [
        "**TF**: Just counting the number of words in each document has 1 issue: it will give more weightage to longer documents than shorter documents. To avoid this, we can use frequency (TF - Term Frequencies) i.e. `#count(word) / #Total words`, in each document.\n",
        "\n",
        "**TF-IDF**: Finally, we can even reduce the weightage of more common words like (the, is, an etc.) which occurs in all document. This is called as `TF-IDF` i.e Term Frequency times inverse document frequency."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "sBzylNUPz2Ba",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "9760b7f4-9af4-46b8-f6f1-5cf2b7153290"
      },
      "source": [
        "tfidf_transformer = TfidfTransformer()\n",
        "X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)\n",
        "X_train_tfidf.shape"
      ],
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(63500, 86505)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 10
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "bnbKkJQ5DcBP",
        "outputId": "bb878343-2dc1-46ed-bd40-1d8446be1f43",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "source": [
        "LABEL_LIST = train_data['tag'].unique()\r\n",
        "print(LABEL_LIST)"
      ],
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "['news' 'chess' 'programming' 'math' 'food' 'fitness' 'tech' 'wildlife']\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "0AnLgnKeXNv7",
        "outputId": "dd6d7a93-1254-4268-b913-70f5bb203645",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 296
        }
      },
      "source": [
        "sns.countplot(y_train)"
      ],
      "execution_count": 21,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<matplotlib.axes._subplots.AxesSubplot at 0x7fef14ef6e90>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 21
        },
        {
          "output_type": "display_data",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAEGCAYAAACkQqisAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAarklEQVR4nO3de5QlZX3u8e/jAOKV4TLhIIMZohMNerxOkCMJGnQhEhWWYoLROCjJmARvibngMSfghURiDFGJJhgIYIiIRAURxQkX4Si3GSHAgMosUBmCMjKAogfiwO/8UW/LTts9NDW99562v5+19uqqt97a9avdu/rZddnVqSokSerjYeMuQJI0dxkikqTeDBFJUm+GiCSpN0NEktTbVuMuYNR22mmnWrJkybjLkKQ5ZfXq1d+rqkWT2+ddiCxZsoRVq1aNuwxJmlOSfGuqdg9nSZJ6M0QkSb0ZIpKk3gwRSVJvhogkqTdDRJLUmyEiSerNEJEk9WaISJJ6m3ffWNdw7f2hvcddAl9+05fHXYI0b8z7EHn2n5wy7hJY/b7XjrsESerFw1mSpN4MEUlSb4aIJKk3Q0SS1JshIknqzRCRJPVmiEiSejNEJEm9GSKSpN4MEUlSb4aIJKm3oYVIkhOT3Jbk2oG29yX5WpKrk3w6ycKBaW9PsjbJ15O8aKB9/9a2NskRA+27J7mstX8iyTbDWhdJ0tSGuSdyErD/pLaVwFOr6mnAN4C3AyTZAzgEeEqb58NJFiRZAPw98GJgD+BVrS/AMcCxVfVE4A7gsCGuiyRpCkMLkaq6CNgwqe2LVbWxjV4KLG7DBwKnVdW9VXUTsBbYsz3WVtWNVfVfwGnAgUkC7Auc0eY/GThoWOsiSZraOM+JvB74fBveFbh5YNq61jZd+47AnQOBNNEuSRqhsYRIkncAG4FTR7S8FUlWJVm1fv36USxSkuaFkYdIkkOBlwCvrqpqzbcAuw10W9zapmu/HViYZKtJ7VOqquOrallVLVu0aNGsrIckacQhkmR/4E+Bl1XVjwYmnQUckuThSXYHlgKXA1cAS9uVWNvQnXw/q4XPBcDBbf7lwJmjWg9JUmeYl/h+HLgEeFKSdUkOA44DHgOsTHJVkn8AqKo1wOnAdcAXgMOr6r52zuONwLnA9cDprS/AnwF/lGQt3TmSE4a1LpKkqQ3tf6xX1aumaJ72D31VHQ0cPUX7OcA5U7TfSHf1liRpTPzGuiSpN0NEktSbISJJ6s0QkST1ZohIknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1ZohIknozRCRJvRkikqTeDBFJUm+GiCSpt6H9Z0PNrm+/63+OuwQe/xfXjLsESVsY90QkSb0ZIpKk3gwRSVJvhogkqTdDRJLU29BCJMmJSW5Lcu1A2w5JVia5of3cvrUnyQeTrE1ydZJnDcyzvPW/IcnygfZnJ7mmzfPBJBnWukiSpjbMPZGTgP0ntR0BnFdVS4Hz2jjAi4Gl7bEC+Ah0oQMcCTwH2BM4ciJ4Wp/fHZhv8rIkSUM2tBCpqouADZOaDwRObsMnAwcNtJ9SnUuBhUl2AV4ErKyqDVV1B7AS2L9Ne2xVXVpVBZwy8FySpBEZ9TmRnavq1jb8HWDnNrwrcPNAv3WtbVPt66Zon1KSFUlWJVm1fv36zVsDSdJPjO3EetuDqBEt6/iqWlZVyxYtWjSKRUrSvDDqEPluOxRF+3lba78F2G2g3+LWtqn2xVO0S5JGaNQhchYwcYXVcuDMgfbXtqu09gLuaoe9zgX2S7J9O6G+H3Bum/b9JHu1q7JeO/BckqQRGdoNGJN8HHg+sFOSdXRXWb0XOD3JYcC3gN9o3c8BDgDWAj8CXgdQVRuSvBu4ovV7V1VNnKz/A7orwB4BfL49JEkjNLQQqapXTTPpBVP0LeDwaZ7nRODEKdpXAU/dnBolSZvHb6xLknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1ZohIknozRCRJvQ3ttifSlupL+zxv3CUA8LyLvjTuEqTN5p6IJKk3Q0SS1JshIknqzRCRJPVmiEiSejNEJEm9GSKSpN4MEUlSb4aIJKk3Q0SS1JshIknqzRCRJPU2lhBJ8odJ1iS5NsnHk2ybZPcklyVZm+QTSbZpfR/exte26UsGnuftrf3rSV40jnWRpPls5CGSZFfgzcCyqnoqsAA4BDgGOLaqngjcARzWZjkMuKO1H9v6kWSPNt9TgP2BDydZMMp1kaT5bly3gt8KeESSHwOPBG4F9gV+q00/GTgK+AhwYBsGOAM4Lkla+2lVdS9wU5K1wJ7AJSNaB2mojnvbZ8ddAgBvfP9Lx12CtmAj3xOpqluAvwG+TRcedwGrgTuramPrtg7YtQ3vCtzc5t3Y+u842D7FPP9NkhVJViVZtX79+tldIUmax8ZxOGt7ur2I3YHHAY+iOxw1NFV1fFUtq6plixYtGuaiJGleGceJ9RcCN1XV+qr6MfApYG9gYZKJw2uLgVva8C3AbgBt+nbA7YPtU8wjSRqBcYTIt4G9kjyyndt4AXAdcAFwcOuzHDizDZ/VxmnTz6+qau2HtKu3dgeWApePaB0kSYzhxHpVXZbkDOCrwEbgSuB44HPAaUne09pOaLOcAHysnTjfQHdFFlW1JsnpdAG0ETi8qu4b6cpI0jw3lquzqupI4MhJzTfSXV01ue89wCuneZ6jgaNnvUBJ0oz4jXVJUm8zCpEk582kTZI0v2zycFaSbem+DLhTuzQ3bdJjmeY7GZKk+ePBzom8AXgr3fc5VvNAiHwfOG6IdUmS5oBNhkhVfQD4QJI3VdWHRlSTJGmOmNHVWVX1oSTPBZYMzlNVpwypLknSHDCjEEnyMeAJwFXAxHcxCjBEJGkem+n3RJYBe7RvikuSBMz8eyLXAv9jmIVIkuaeme6J7ARcl+Ry4N6Jxqp62VCqkiTNCTMNkaOGWYQkaW6a6dVZXxp2IZKkuWemV2f9gO5qLIBtgK2BH1bVY4dVmCRpyzfTPZHHTAwP/H/zvYZVlCRpbnjId/GtzmeAFw2hHknSHDLTw1kvHxh9GN33Ru4ZSkWSpDljpldnvXRgeCPwTbpDWpKkeWym50ReN+xCJElzz0z/KdXiJJ9Oclt7/FuSxcMuTpK0ZZvpifV/Bs6i+78ijwM+29okSfPYTENkUVX9c1VtbI+TgEVDrEuSNAfMNERuT/KaJAva4zXA7cMsTJK05ZtpiLwe+A3gO8CtwMHAoUOqSZI0R8w0RN4FLK+qRVX1c3Sh8s6+C02yMMkZSb6W5Pok/yvJDklWJrmh/dy+9U2SDyZZm+TqJM8aeJ7lrf8NSZb3rUeS1M9MQ+RpVXXHxEhVbQCeuRnL/QDwhap6MvB04HrgCOC8qloKnNfGAV4MLG2PFcBHAJLsABwJPAfYEzhyIngkSaMx0xB52OAf6PYHfKZfVPxvkmwH7AOcAFBV/1VVd9J9efHk1u1k4KA2fCBwSrvdyqXAwiS70N12ZWVVbWgBtxLYv09NkqR+ZhoE7wcuSfLJNv5K4Oiey9wdWA/8c5KnA6uBtwA7V9Wtrc93gJ3b8K7AzQPzr2tt07X/lCQr6PZiePzjH9+zbEnSZDPaE6mqU4CXA99tj5dX1cd6LnMr4FnAR6rqmcAPeeDQ1cTyigduPb/Zqur4qlpWVcsWLfLKZEmaLTM+JFVV1wHXzcIy1wHrquqyNn4GXYh8N8kuVXVrO1x1W5t+C7DbwPyLW9stwPMntV84C/VJkmboId8KfnNV1XeAm5M8qTW9gC6czgImrrBaDpzZhs8CXtuu0toLuKsd9joX2C/J9u18zX6tTZI0Ir1Ojs+CNwGnJtkGuBF4HV2gnZ7kMOBbdN9LATgHOABYC/yo9aWqNiR5N3BF6/eudtWYJGlExhIiVXUV3f8kmewFU/Qt4PBpnudE4MTZrU6SNFMjP5wlSfrZYYhIknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1ZohIknozRCRJvRkikqTeDBFJUm+GiCSpN0NEktTbuP4plSSN1PVHnz/uEvild+w77hJmnXsikqTeDBFJUm+GiCSpN0NEktSbISJJ6s0QkST1NrYQSbIgyZVJzm7juye5LMnaJJ9Isk1rf3gbX9umLxl4jre39q8nedF41kSS5q9x7om8Bbh+YPwY4NiqeiJwB3BYaz8MuKO1H9v6kWQP4BDgKcD+wIeTLBhR7ZIkxhQiSRYDvw78UxsPsC9wRutyMnBQGz6wjdOmv6D1PxA4raruraqbgLXAnqNZA0kSjG9P5O+APwXub+M7AndW1cY2vg7YtQ3vCtwM0Kbf1fr/pH2Kef6bJCuSrEqyav369bO5HpI0r408RJK8BLitqlaPaplVdXxVLauqZYsWLRrVYiXpZ9447p21N/CyJAcA2wKPBT4ALEyyVdvbWAzc0vrfAuwGrEuyFbAdcPtA+4TBeSRJIzDyPZGqentVLa6qJXQnxs+vqlcDFwAHt27LgTPb8FltnDb9/Kqq1n5Iu3prd2ApcPmIVkOSxJZ1F98/A05L8h7gSuCE1n4C8LEka4ENdMFDVa1JcjpwHbAROLyq7ht92ZI0f401RKrqQuDCNnwjU1xdVVX3AK+cZv6jgaOHV6EkaVP8xrokqbct6XCWpDno6Ncc/OCdRuAd/3LGg3fSrHNPRJLUmyEiSerNEJEk9WaISJJ6M0QkSb0ZIpKk3gwRSVJvhogkqTdDRJLUmyEiSerNEJEk9WaISJJ6M0QkSb0ZIpKk3gwRSVJvhogkqTdDRJLUmyEiSerNEJEk9WaISJJ6M0QkSb2NPESS7JbkgiTXJVmT5C2tfYckK5Pc0H5u39qT5INJ1ia5OsmzBp5reet/Q5Llo14XSZrvxrEnshF4W1XtAewFHJ5kD+AI4LyqWgqc18YBXgwsbY8VwEegCx3gSOA5wJ7AkRPBI0kajZGHSFXdWlVfbcM/AK4HdgUOBE5u3U4GDmrDBwKnVOdSYGGSXYAXASurakNV3QGsBPYf4apI0rw31nMiSZYAzwQuA3auqlvbpO8AO7fhXYGbB2Zb19qma59qOSuSrEqyav369bNWvyTNd2MLkSSPBv4NeGtVfX9wWlUVULO1rKo6vqqWVdWyRYsWzdbTStK8N5YQSbI1XYCcWlWfas3fbYepaD9va+23ALsNzL64tU3XLkkakXFcnRXgBOD6qvrbgUlnARNXWC0Hzhxof227Smsv4K522OtcYL8k27cT6vu1NknSiGw1hmXuDfw2cE2Sq1rb/wbeC5ye5DDgW8BvtGnnAAcAa4EfAa8DqKoNSd4NXNH6vauqNoxmFSRJMIYQqar/C2SayS+Yon8Bh0/zXCcCJ85edZKkh8JvrEuSehvH4SxJ0jSOOuqocZfwkGpwT0SS1JshIknqzRCRJPVmiEiSejNEJEm9GSKSpN4MEUlSb4aIJKk3Q0SS1JshIknqzRCRJPVmiEiSejNEJEm9GSKSpN4MEUlSb4aIJKk3Q0SS1JshIknqzRCRJPVmiEiSejNEJEm9zfkQSbJ/kq8nWZvkiHHXI0nzyZwOkSQLgL8HXgzsAbwqyR7jrUqS5o85HSLAnsDaqrqxqv4LOA04cMw1SdK8kaoadw29JTkY2L+qfqeN/zbwnKp646R+K4AVbfRJwNdnuZSdgO/N8nPOtrlQI1jnbLPO2TWf6/z5qlo0uXGrWV7IFqmqjgeOH9bzJ1lVVcuG9fyzYS7UCNY526xzdlnnT5vrh7NuAXYbGF/c2iRJIzDXQ+QKYGmS3ZNsAxwCnDXmmiRp3pjTh7OqamOSNwLnAguAE6tqzRhKGdqhslk0F2oE65xt1jm7rHOSOX1iXZI0XnP9cJYkaYwMEUlSb4bIz4gkJ7XvzWyxkrw5yfVJTt3M5/lmkp1msZ47Jm6Zk+Qg73owtSRfGdFynpHkgIHxo5L88RCXtzDJH/Scd6TbXZJzkixsw3c/WE1JLkyybIp5Z2VbhDl+Yl1zzh8AL6yqdeMupJmqnoOAs4HrRlFAkgVVdd9mzL9VVW2czZqmU1XPHcVygGcAy4BzRrS8hXTvhQ+PaHm9VdUBD95rRvPO2rbonsgUkixpKf3RJGuSfDHJI5I8IckXkqxOcnGSJydZkOSmdBYmuS/JPu15LkqyNMnzklzVHlcmecws1PjaJFcn+Y8kH2vN+yT5SpIbBz8dJfmTJFe0/u9sbY9K8rk2/7VJfrO1vzfJda3v32xunQM1/APwC8Dnk7wtyWfaMi5N8rTWZ4dp2ndsv4M1Sf4JyCzX84dJjkvyXOBlwPva7+oJ7ZPcMUkuT/KNJL/a5l+Q5H0Dr+sbWvsu7fd+Vbobg34ryb8muTPJXe213tCe86vAK5O8Ksk1bdoxAzUe1pZ5eXsvHtfaT0ryD0kuA/46yZ5JLmnvra8keVLrd2h7PVem23t7Y5I/av0uTbJD63dhkmOTrGrv+19O8qkkNyR5z0A9d7efz2/znJHka0lOTZI27YDWdk1bz5vbOpya5IVJvtyed8+p6k53qf67gN9sr+FvtsXv0ZZ5Y5I3b+7vf5L3Ak9oy3vfVNtLW7eptjmYZrvroy37zW342CTnt+F922v4U3vh6RzX3m//DvzcNM/9zSQ7TfHef1SSE9v77MokD+3WUVXlY9IDWAJsBJ7Rxk8HXgOcByxtbc8Bzm/DXwCeAryE7rsr7wAeDtzUpn8W2LsNPxrYajPrewrwDWCnNr4DcBLwSboPBnvQ3VMMYD+6y/3Spp0N7AO8AvjowHNuB+xId0uYiav2Fs7y6/pNutsxfAg4srXtC1zVhqdr/yDwF23414GaWPdZqudQ4LjWdhJw8ECfC4H3t+EDgH9vwyuAP2/DDwdWAbsDbwPe0dp/odX6emAlcCLwx8C3gT9tfR7XxhfRHRk4n25v6HGtvh2ArYGLJ9V4NrCgjT924j0FvBD4tzZ8KLAWeEx7/ruA32vTjgXeOrCOx7ThtwD/CezS1msdsGObdnf7+fz2XIvbe+oS4FeAbYGb2+uwBLgf+FLrs7qtf+jub/eZB6n7uIHfwVHAV1o9OwG3A1vP8vZ+7YNsLz+1zQ38Ln5qu9uMWvYCPtmGLwYub7//I4E3tPfERA0Tv4+X072/FrT3zZ2093D73S4bfL9PMfyXwGsmtvm2no+aac0ezpreTVV1VRteTfdGey7wyfahC7o3NXS/7H3oNp6/An6XbuO5ok3/MvC36Y4/fqo2fxdyX7o32vcAqmpDq+kzVXU/cF2SnVvf/drjyjb+aGBpq/n97ZPv2VV1cZKtgHuAE5KcTbcBDcOv0IUYVXV+uj2Nx26ifR+6DYWq+lySO4ZU13Q+1X5OvA+ge02fNvDJczu61/UK4MQkW9P9AbgZ+DTdB4u76dbjfuATbb5fBi6sqvUA7T2yT5v2para0No/CfziQE2frAcOg20HnJxkKV1obT3Q74Kq+gHwgyR30X2gAbgGeNpAv7MG2tdU1a1tuTfS3RXi9kmvyeUT7+MkV7XX5W7gxqq6KckS4DvAD6rq/iRrgPOqqpJc0/pvqu7JPldV9wL3JrkN2Jku4GbbdNvL05m0zQ3MM9V219dq4NntfX8v8FW6Q3u/CrwZePsU8+wDfLy9H/5zYu/lIdgPeFkeOO+0LfB44PqZzOzhrOndOzB8H90nwjur6hkDj19q0y+i+yXvSXccdyHdp7WLAarqvcDvAI8AvpzkySOoOQM//2qg5idW1QlV9Q3gWXR/NN6T5C+qO7a+J3AG3V7VF4ZU51wz8brexwPnEQO8aeB13b2qvlhVF9Ft1LcAfwM8sqruoPsjNPHHdkfgh5tZ0+D876YLi6cCL6X7IzC5dujC696B4a2m6Hf/FPNM9WFz8vYxVZ8fP8iyN1V3n+XNhim3lweZZ6rtrpeq+jFwE93e2Ffo/ob8GvBEZvhHvYcArxhY58dX1YyXZYjM3PeBm5K8En5yHPLpbdrldHsp91fVPXR/LN5AFy4keUJVXVNVx9B9Ut3cEDmf7lj6ju35d9hE33OB1yd5dOu7a5KfS/I44EdV9S/A+4BntT7bVdU5wB/S/eEbhouBV7d6ng98r6q+v4n2i4Dfau0vBrYfUl0AP6A7/PNgzgV+v+1xkOQX27Hlnwe+W1UfpfvXBDsm2Z9uW9sNOJUH9mChe+88rx2rXgC8igf2Yp+XZPu2h/iKTdSyHQ/cM+7QGa7nMHwd+IW2FwJdXZsyXd0z/R3MlsHlTbm98NC2uc11Md1hz4va8O8BV1Y73jSFi+jOIS1Isgtd6DwU5wJvGjiv9cyHMrOHsx6aVwMfSfLndLvepwH/UVX3JrkZuLT1u5juj8E1bfytSX6N7hPYGuDzm1NEVa1JcjTwpST38cCu91R9v5jkl4BL2nvkbrrzO0+kO4F8P90nxt+n25DOTLIt3aeTP9qcOjfhKLpDPlcDPwKWP0j7O4GPt0MiX6E7hzAspwEfbSc3N3WS9J/o9iq+2ja+9XTnMp4P/EmSH9O9rjcCb6Q7pHUv8C3gJ4fjqurWdJcXX0D3mn+uqs4ESPKXdCGzAfga3XmIqfw13WGhPwc+99BXeXZU1f9Ld6nsF+jW/X6mrxmmr/sC4Ih2mOyvhlXvhKq6Pd0J/2vpts1/ZdL2Ms02d+iQSrqY7vDnJVX1wyT3tLbpfJruEPd1dNvGJQ9xee8G/g64OsnD6PaEXjLTmb3tiTQk7RP52e1wTZ/5H11Vd7c9kU/T3Rvu07NY4qwbqDl0/3X0hqo6dtx1aXg8nCVtuY5qn8avpft0+Jkx1zMTv9tqXkN3uOofx1yPhsw9EUlSb+6JSJJ6M0QkSb0ZIpKk3gwRaUSyGXeLlbZUhog0OhN3i5V+ZvhlQ2l0fnK3WLov1D2N7tv3W9PdzHHiS4b/h+4Loevp7r21uqpm7Y7K0mwyRKTROQJ4alU9o32B8JFV9f10t/a+NMlZdDfbewXdLWe2prsB3+qxVSw9CENEGo8Af5nuf8/cD+xKd2favYEz2z3Y7kny2U08hzR2hog0Hq+m+x8fz66qHyf5Jpu+i620RfLEujQ6g3eL3Q64rQXIrwE/39q/DLw0ybbtTrIzvhGeNA7uiUgjMulusVcAT27/oGkV3V16qaor2rmRq4Hv0t0JelN3wpXGyntnSVuYgTvhPpLuf0WsqKqvjrsuaSruiUhbnuOT7EF3juRkA0RbMvdEJEm9eWJdktSbISJJ6s0QkST1ZohIknozRCRJvf1/QNVdvlHCXKcAAAAASUVORK5CYII=\n",
            "text/plain": [
              "<Figure size 432x288 with 1 Axes>"
            ]
          },
          "metadata": {
            "tags": [],
            "needs_background": "light"
          }
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "fygOQDu72wCz"
      },
      "source": [
        "## Define the Model\n",
        "\n",
        "- We have fixed our data and now we are ready to train our model.   \n",
        "\n",
        "- There are a ton of classifiers to choose from some being [Naive Bayes](https://scikit-learn.org/stable/modules/naive_bayes.html#naive-bayes), [Logistic Regression](https://towardsdatascience.com/logistic-regression-detailed-overview-46c4da4303bc), [SVM](https://towardsdatascience.com/support-vector-machine-introduction-to-machine-learning-algorithms-934a444fca47), [Random Forests](https://towardsdatascience.com/support-vector-machine-introduction-to-machine-learning-algorithms-934a444fca47), [Decision Trees](https://towardsdatascience.com/decision-trees-in-machine-learning-641b9c4e8052), etc.🧐         \n",
        "\n",
        "- Remember that there are no hard-laid rules here. you can mix and match classifiers, it is advisable to read up on the numerous techniques and choose the best fit for your solution , experimentation is the key.     \n",
        "   \n",
        "- A good model does not depend solely on the classifier but also on the features you choose. So make sure to analyse and understand your data well and move forward with a clear view of the problem at hand.  you can gain important insight from [here](https://towardsdatascience.com/the-5-feature-selection-algorithms-every-data-scientist-need-to-know-3a6b566efd2).🧐         "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "KAqFWkmP0w7U"
      },
      "source": [
        "classifier = DecisionTreeClassifier(max_depth = 2)"
      ],
      "execution_count": 12,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "QqBfqFeuQclG"
      },
      "source": [
        "from sklearn.multioutput import MultiOutputClassifier\r\n",
        "from sklearn.naive_bayes import BernoulliNB, ComplementNB, MultinomialNB"
      ],
      "execution_count": 24,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "mYz7OucfUJxh"
      },
      "source": [
        "!pip install -q ktrain"
      ],
      "execution_count": 14,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XfWRnUX8Nr7d"
      },
      "source": [
        "\r\n",
        "import ktrain"
      ],
      "execution_count": 15,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Sl1D6c8tVxFi"
      },
      "source": [
        "classifier2 = MultinomialNB()"
      ],
      "execution_count": 26,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yQ_Odn_Ydhq8"
      },
      "source": [
        "text_clf = Pipeline([('vect', CountVectorizer(stop_words='english')),\r\n",
        "                      ('tfidf', TfidfTransformer()),\r\n",
        "                      ('clf', classifier2)])"
      ],
      "execution_count": 27,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "mlJ1hiuTVZl6",
        "outputId": "78b0a8e2-0eec-4232-aa39-0ff828a066ac",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 358,
          "referenced_widgets": [
            "90d3e52efacd45c9827ce735f1cd0e85",
            "84ba412f0ea04a4aaded73fc67772907",
            "f5dbbf7d9eb54e01ba5bd93797f8c9a4",
            "bb2208c383f34a15b7ea2082255fd30e",
            "ada95e53ecdb44fc811a51684aa841af",
            "8a9b75d84ac44178bf152003ed9d0ebb",
            "55970aa3a0b34a5aba102debe970c2a0",
            "9139c047bc3241fb9f415388c3fe4d87"
          ]
        }
      },
      "source": [
        ""
      ],
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/ktrain/text/preprocessor.py:414: UserWarning: The class_names argument is replacing the classes argument. Please update your code.\n",
            "  warnings.warn('The class_names argument is replacing the classes argument. Please update your code.')\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "90d3e52efacd45c9827ce735f1cd0e85",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=442.0, style=ProgressStyle(description_…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "╭───────────────────── Traceback (most recent call last) ──────────────────────╮\n",
            "│ /usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py:2882 │\n",
            "│ in run_code                                                                  │\n",
            "│                                                                              │\n",
            "│   2879 │   │   │   try:                                                      │\n",
            "│   2880 │   │   │   │   self.hooks.pre_run_code_hook()                        │\n",
            "│   2881 │   │   │   │   #rprint('Running code', repr(code_obj)) # dbg         │\n",
            "│ ❱ 2882 │   │   │   │   exec(code_obj, self.user_global_ns, self.user_ns)     │\n",
            "│   2883 │   │   │   finally:                                                  │\n",
            "│   2884 │   │   │   │   # Reset our crash handler in place                    │\n",
            "│   2885 │   │   │   │   sys.excepthook = old_excepthook                       │\n",
            "│ <ipython-input-17-e47b68e9d5d1>:3 in <module>                                │\n",
            "╰──────────────────────────────────────────────────────────────────────────────╯\n",
            "NameError: name 'train_text' is not defined\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "UfKPrvQu2wC2"
      },
      "source": [
        "- To start you off, We have used a basic [Decision Tree](https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html) classifier here.     \n",
        "- Do keep in mind there exist sophisticated techniques for everything, the key as quoted earlier is to search them and experiment to fit your implementation."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "CpFNFIw12wC2"
      },
      "source": [
        "## Train the Model"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "lpzwdomq2b30"
      },
      "source": [
        "**Building a pipeline**: We can write less code and do all of the above, by building a pipeline as follows:"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "xiO9VwimvAKa"
      },
      "source": [
        "text_clf = Pipeline([('vect', CountVectorizer(stop_words='english')),\n",
        "                      ('tfidf', TfidfTransformer()),\n",
        "                      ('clf', classifier2)])\n",
        "text_clf = text_clf.fit(X_train, y_train)"
      ],
      "execution_count": 28,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "8nfBmrjI2mub"
      },
      "source": [
        "Tip: To Improve your accuracy you can do something called stemming. `Stemming` is the process of reducing inflected (or sometimes derived) words to their word stem, base or root form. E.g. A stemming algorithm reduces the words “fishing”, “fished”, and “fisher” to the root word, “fish”.\n",
        "\n",
        "You can use NLTK which can be installed from [here](http://www.nltk.org/). NLTK comes with various stemmers  which can help reducing the words to their root form. "
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "RDzimyBPm1iK"
      },
      "source": [
        "# Validation Phase 🤔\n",
        "Wonder how well your model learned! Lets check it."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "91NtIX202wC5"
      },
      "source": [
        "## Predict on Validation\n",
        "\n",
        "Now we predict using our trained model on the validation set we created and evaluate our model on unforeseen data."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "wv7jeU3W2wC6",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "3c03072f-4fd9-4a41-c601-2c56700605bf"
      },
      "source": [
        "y_pred = text_clf.predict(X_val)\n",
        "print(y_pred)"
      ],
      "execution_count": 29,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "['food' 'programming' 'news' ... 'fitness' 'fitness' 'tech']\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "5z-zEWhz2wC8"
      },
      "source": [
        "## Evaluate the Performance\n",
        "\n",
        "- We have used basic metrics to quantify the performance of our model.  \n",
        "- This is a crucial step, you should reason out the metrics and take hints to improve aspects of your model.\n",
        "- Do read up on the meaning and use of different metrics. there exist more metrics and measures, you should learn to use them correctly with respect to the solution,dataset and other factors. \n",
        "- [F1 score](https://en.wikipedia.org/wiki/F1_score) is the metric for this challenge"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "kuDZ5eIl2wC8"
      },
      "source": [
        "precision = precision_score(y_val,y_pred,average='micro')\n",
        "recall = recall_score(y_val,y_pred,average='micro')\n",
        "accuracy = accuracy_score(y_val,y_pred)\n",
        "f1 = f1_score(y_val,y_pred,average='macro')"
      ],
      "execution_count": 30,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "7My62Dvw2wC-",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "52a353bc-b06f-4dff-9886-d3dadaf8a63a"
      },
      "source": [
        "print(\"Accuracy of the model is :\" ,accuracy)\n",
        "print(\"Recall of the model is :\" ,recall)\n",
        "print(\"Precision of the model is :\" ,precision)\n",
        "print(\"F1 score of the model is :\" ,f1)"
      ],
      "execution_count": 31,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Accuracy of the model is : 0.9193121693121693\n",
            "Recall of the model is : 0.9193121693121693\n",
            "Precision of the model is : 0.9193121693121693\n",
            "F1 score of the model is : 0.8096189343429978\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "dzALs-Qg2wDB"
      },
      "source": [
        "# Testing Phase 😅\n",
        "\n",
        "We are almost done. We trained and validated on the training data. Now its the time to predict on test set and make a submission."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Xhi38sIB2wDB"
      },
      "source": [
        "## Load Test Set\n",
        "\n",
        "Load the test data on which final submission is to be made."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "pkaRiDDe2wDC",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "9b8ec131-e00b-4974-bfb5-1a34319ea767"
      },
      "source": [
        "final_test_path = \"test.csv\"\n",
        "final_test = pd.read_csv(final_test_path)\n",
        "len(final_test)"
      ],
      "execution_count": 32,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "19844"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 32
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "UowX6JDt2wDG"
      },
      "source": [
        "## Predict Test Set\n",
        "Predict on the test set and you are all set to make the submission !"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XPSnS_tJ2wDH"
      },
      "source": [
        "submission = text_clf.predict(final_test['text'])"
      ],
      "execution_count": 33,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "CzNzVf_p2wDI"
      },
      "source": [
        "## Save the prediction to csv"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "xkYnHOAh2wDL"
      },
      "source": [
        "submission = pd.DataFrame(submission)\n",
        "submission.to_csv('submission.csv',header=['tag'], index=False)"
      ],
      "execution_count": 34,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "VlYQsWiX2wDM"
      },
      "source": [
        "🚧 Note :    \n",
        "- Do take a look at the submission format.   \n",
        "- The submission file should contain a header.   \n",
        "- Follow all submission guidelines strictly to avoid inconvenience."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "g4p8bF7Z2wDN",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "4db1b8d9-8e70-4328-df82-f7e7d5d7afd2"
      },
      "source": [
        "%aicrowd submission create -c tiring-text -f submission.csv # submit the csv\r\n",
        "%aicrowd submission create --help"
      ],
      "execution_count": 37,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Usage: %aicrowd submission create [OPTIONS]\n",
            "Try '%aicrowd submission create --help' for help.\n",
            "\n",
            "Error: Got unexpected extra arguments (# submit the csv)\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "Usage: %aicrowd submission create [OPTIONS]\n",
            "\n",
            "  Create a submission on AIcrowd\n",
            "\n",
            "Options:\n",
            "  -c, --challenge TEXT    Specify challenge explicitly\n",
            "  -f, --file PATH         The file to submit\n",
            "  -d, --description TEXT  Description\n",
            "  --jupyter               Bundle jupyter notebook\n",
            "  --help                  Show this message and exit.\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}