renaud/farm_en_ner.ipynb

## farm_en_ner.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "FARM_en_NER.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "mount_file_id": "1qqewywPJDcrT4vTNBbkEJGO6ZUHievNO",
      "authorship_tag": "ABX9TyNUVWmo35Cs8dOpEfoZBRt6",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "1b39397a3b7c4f46af0f78c302af8b73": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_1954a748853b45018b25be2dccca4e80",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_c60b9896692f41d0ac6f25bf6aa4ea24",
              "IPY_MODEL_9855be6e35d047b1806ca63798ef123e"
            ]
          }
        },
        "1954a748853b45018b25be2dccca4e80": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "c60b9896692f41d0ac6f25bf6aa4ea24": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "IntProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_7c9049195247414fb19297ab266e1539",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "IntProgressModel",
            "bar_style": "success",
            "max": 213450,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 213450,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_7d7eb476372e4a3bb7f47b25431a6150"
          }
        },
        "9855be6e35d047b1806ca63798ef123e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_bbbd1a43688549968fd2ce8a924e403b",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 213k/213k [00:00&lt;00:00, 1.80MB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_1d0391e4ee324d25a8cf93f4758fd62f"
          }
        },
        "7c9049195247414fb19297ab266e1539": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "7d7eb476372e4a3bb7f47b25431a6150": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "bbbd1a43688549968fd2ce8a924e403b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "1d0391e4ee324d25a8cf93f4758fd62f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "aacfc007c1f5457c8f54b30ec66863b1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_40ff435523b04903aad8e65507a662d3",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_2ce03980b8eb47e3bc4def1a25f9f150",
              "IPY_MODEL_3401c4064ff449ad9b7a785f85629ec1"
            ]
          }
        },
        "40ff435523b04903aad8e65507a662d3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "2ce03980b8eb47e3bc4def1a25f9f150": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "IntProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_0b3502a0bc304f51a13df06516697210",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "IntProgressModel",
            "bar_style": "success",
            "max": 361,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 361,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_bc38e1ba9e1e47d0846c62ab55042118"
          }
        },
        "3401c4064ff449ad9b7a785f85629ec1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_58a7a998b3bd48b8b273fca4a7d628aa",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 361/361 [00:00&lt;00:00, 2.83kB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_de3726de50c84a459b04df3a2e2710bd"
          }
        },
        "0b3502a0bc304f51a13df06516697210": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "bc38e1ba9e1e47d0846c62ab55042118": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "58a7a998b3bd48b8b273fca4a7d628aa": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "de3726de50c84a459b04df3a2e2710bd": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "897ecef9f62e46449dc5da3677f31a51": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_30ce00d9d7254e42a75f6eb1bd3aea56",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_6e04f43cc22143a49d165fa78cdebf1d",
              "IPY_MODEL_34078f982a4a49fb87d858d6b926d26c"
            ]
          }
        },
        "30ce00d9d7254e42a75f6eb1bd3aea56": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "6e04f43cc22143a49d165fa78cdebf1d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "IntProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_ee65e576985f4b9f975805df04c2e56a",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "IntProgressModel",
            "bar_style": "success",
            "max": 435779157,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 435779157,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_13d9851cad514f188a2281b5f81827e0"
          }
        },
        "34078f982a4a49fb87d858d6b926d26c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_ef25ad38764f424284acc290ab4208e1",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 436M/436M [00:07&lt;00:00, 58.3MB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_17c4e90625074475a3cba8ad8f50a527"
          }
        },
        "ee65e576985f4b9f975805df04c2e56a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "13d9851cad514f188a2281b5f81827e0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "ef25ad38764f424284acc290ab4208e1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "17c4e90625074475a3cba8ad8f50a527": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/renaud/2317b0c8e6d4ced7abd8088f5594c547/farm_en_ner.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "xyO45RHBgjI0",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 139
        },
        "outputId": "1565a4ae-7f82-4d8c-978a-1a5c0c2edf66"
      },
      "source": [
        "# https://github.com/deepset-ai/FARM#basic-usage\n",
        "!git clone https://github.com/deepset-ai/FARM.git"
      ],
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Cloning into 'FARM'...\n",
            "remote: Enumerating objects: 85, done.\u001b[K\n",
            "remote: Counting objects: 100% (85/85), done.\u001b[K\n",
            "remote: Compressing objects: 100% (71/71), done.\u001b[K\n",
            "remote: Total 4933 (delta 47), reused 32 (delta 14), pack-reused 4848\u001b[K\n",
            "Receiving objects: 100% (4933/4933), 64.95 MiB | 24.13 MiB/s, done.\n",
            "Resolving deltas: 100% (3668/3668), done.\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qXzI39Jd1SsY",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "18bd6ed9-4ccd-4114-8804-80a05843b162"
      },
      "source": [
        "cd FARM\n"
      ],
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "/content/FARM\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "eunTBTOW1tBb",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "7db64161-8e55-45f1-ebc2-a93f48ed548d"
      },
      "source": [
        "!pip install -r requirements.txt\n",
        "!pip install --editable .\n"
      ],
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 2)) (46.1.3)\n",
            "Requirement already satisfied: wheel in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 3)) (0.34.2)\n",
            "Requirement already satisfied: torch>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 5)) (1.4.0)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 7)) (4.38.0)\n",
            "Requirement already satisfied: boto3 in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 9)) (1.12.38)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 11)) (2.21.0)\n",
            "Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 13)) (1.4.1)\n",
            "Requirement already satisfied: sklearn in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 14)) (0.0)\n",
            "Collecting seqeval\n",
            "  Downloading https://files.pythonhosted.org/packages/34/91/068aca8d60ce56dd9ba4506850e876aba5e66a6f2f29aa223224b50df0de/seqeval-0.0.12.tar.gz\n",
            "Collecting mlflow==1.0.0\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/01/ec/8c9448968d4662e8354b9c3a62e635f8929ed507a45af3d9fdb84be51270/mlflow-1.0.0-py3-none-any.whl (47.7MB)\n",
            "\u001b[K     |████████████████████████████████| 47.7MB 61kB/s \n",
            "\u001b[?25hCollecting transformers==2.7.0\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/37/ba/dda44bbf35b071441635708a3dd568a5ca6bf29f77389f7c7c6818ae9498/transformers-2.7.0-py3-none-any.whl (544kB)\n",
            "\u001b[K     |████████████████████████████████| 552kB 44.7MB/s \n",
            "\u001b[?25hCollecting dotmap==1.3.0\n",
            "  Downloading https://files.pythonhosted.org/packages/fa/eb/ee5f0358a9e0ede90308d8f34e697e122f191c2702dc4f614eca7770b1eb/dotmap-1.3.0-py3-none-any.whl\n",
            "Collecting Werkzeug==0.16.1\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/c2/e4/a859d2fe516f466642fa5c6054fd9646271f9da26b0cac0d2f37fc858c8f/Werkzeug-0.16.1-py2.py3-none-any.whl (327kB)\n",
            "\u001b[K     |████████████████████████████████| 327kB 33.7MB/s \n",
            "\u001b[?25hRequirement already satisfied: flask in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 24)) (1.1.2)\n",
            "Collecting flask-restplus\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/c2/a6/b17c848771f96ad039ad9e3ea275e842a16c39c4f3eb9f60ee330b20b6c2/flask_restplus-0.13.0-py2.py3-none-any.whl (2.5MB)\n",
            "\u001b[K     |████████████████████████████████| 2.5MB 46.2MB/s \n",
            "\u001b[?25hCollecting flask-cors\n",
            "  Downloading https://files.pythonhosted.org/packages/78/38/e68b11daa5d613e3a91e4bf3da76c94ac9ee0d9cd515af9c1ab80d36f709/Flask_Cors-3.0.8-py2.py3-none-any.whl\n",
            "Requirement already satisfied: dill in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 29)) (0.3.1.1)\n",
            "Collecting onnxruntime\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/69/39/404df5ee608c548dacde43a17faf0248b183fa6163cf9c06aca6a511d760/onnxruntime-1.2.0-cp36-cp36m-manylinux1_x86_64.whl (3.7MB)\n",
            "\u001b[K     |████████████████████████████████| 3.7MB 47.1MB/s \n",
            "\u001b[?25hRequirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /usr/local/lib/python3.6/dist-packages (from boto3->-r requirements.txt (line 9)) (0.3.3)\n",
            "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->-r requirements.txt (line 9)) (0.9.5)\n",
            "Requirement already satisfied: botocore<1.16.0,>=1.15.38 in /usr/local/lib/python3.6/dist-packages (from boto3->-r requirements.txt (line 9)) (1.15.38)\n",
            "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->-r requirements.txt (line 11)) (1.24.3)\n",
            "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->-r requirements.txt (line 11)) (2.8)\n",
            "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->-r requirements.txt (line 11)) (3.0.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->-r requirements.txt (line 11)) (2020.4.5.1)\n",
            "Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from scipy>=1.3.2->-r requirements.txt (line 13)) (1.18.2)\n",
            "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.6/dist-packages (from sklearn->-r requirements.txt (line 14)) (0.22.2.post1)\n",
            "Requirement already satisfied: Keras>=2.2.4 in /usr/local/lib/python3.6/dist-packages (from seqeval->-r requirements.txt (line 16)) (2.3.1)\n",
            "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (2.8.1)\n",
            "Collecting simplejson\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/98/87/a7b98aa9256c8843f92878966dc3d8d914c14aad97e2c5ce4798d5743e07/simplejson-3.17.0.tar.gz (83kB)\n",
            "\u001b[K     |████████████████████████████████| 92kB 11.2MB/s \n",
            "\u001b[?25hRequirement already satisfied: sqlparse in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (0.3.1)\n",
            "Collecting querystring-parser\n",
            "  Downloading https://files.pythonhosted.org/packages/4a/fa/f54f5662e0eababf0c49e92fd94bf178888562c0e7b677c8941bbbcd1bd6/querystring_parser-1.2.4.tar.gz\n",
            "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (1.12.0)\n",
            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (3.13)\n",
            "Requirement already satisfied: entrypoints in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (0.3)\n",
            "Collecting docker>=3.6.0\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/58/74/379a9d30b1620def158c40b88c43e01c1936a287ebb97afab0699c601c57/docker-4.2.0-py2.py3-none-any.whl (143kB)\n",
            "\u001b[K     |████████████████████████████████| 153kB 43.3MB/s \n",
            "\u001b[?25hCollecting alembic\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/60/1e/cabc75a189de0fbb2841d0975243e59bde8b7822bacbb95008ac6fe9ad47/alembic-1.4.2.tar.gz (1.1MB)\n",
            "\u001b[K     |████████████████████████████████| 1.1MB 43.5MB/s \n",
            "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "    Preparing wheel metadata ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (7.1.1)\n",
            "Collecting databricks-cli>=0.8.0\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/49/d1/fe0ba3d5c2b4b76ec035aa243bbc2fd0d60607a391f192ebe1656e17a4e2/databricks-cli-0.10.0.tar.gz (45kB)\n",
            "\u001b[K     |████████████████████████████████| 51kB 6.9MB/s \n",
            "\u001b[?25hRequirement already satisfied: protobuf>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (3.10.0)\n",
            "Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (1.3.16)\n",
            "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (1.3.0)\n",
            "Collecting gunicorn\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/69/ca/926f7cd3a2014b16870086b2d0fdc84a9e49473c68a8dff8b57f7c156f43/gunicorn-20.0.4-py2.py3-none-any.whl (77kB)\n",
            "\u001b[K     |████████████████████████████████| 81kB 9.5MB/s \n",
            "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (1.0.3)\n",
            "Collecting gitpython>=2.1.0\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/19/1a/0df85d2bddbca33665d2148173d3281b290ac054b5f50163ea735740ac7b/GitPython-3.1.1-py3-none-any.whl (450kB)\n",
            "\u001b[K     |████████████████████████████████| 460kB 42.3MB/s \n",
            "\u001b[?25hCollecting sacremoses\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/99/50/93509f906a40bffd7d175f97fd75ea328ad9bd91f48f59c4bd084c94a25e/sacremoses-0.0.41.tar.gz (883kB)\n",
            "\u001b[K     |████████████████████████████████| 890kB 38.4MB/s \n",
            "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->-r requirements.txt (line 19)) (2019.12.20)\n",
            "Collecting tokenizers==0.5.2\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/d1/3f/73c881ea4723e43c1e9acf317cf407fab3a278daab3a69c98dcac511c04f/tokenizers-0.5.2-cp36-cp36m-manylinux1_x86_64.whl (3.7MB)\n",
            "\u001b[K     |████████████████████████████████| 3.7MB 41.4MB/s \n",
            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->-r requirements.txt (line 19)) (3.0.12)\n",
            "Collecting sentencepiece\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/74/f4/2d5214cbf13d06e7cb2c20d84115ca25b53ea76fa1f0ade0e3c9749de214/sentencepiece-0.1.85-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)\n",
            "\u001b[K     |████████████████████████████████| 1.0MB 35.2MB/s \n",
            "\u001b[?25hRequirement already satisfied: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->-r requirements.txt (line 19)) (0.7)\n",
            "Requirement already satisfied: itsdangerous>=0.24 in /usr/local/lib/python3.6/dist-packages (from flask->-r requirements.txt (line 24)) (1.1.0)\n",
            "Requirement already satisfied: Jinja2>=2.10.1 in /usr/local/lib/python3.6/dist-packages (from flask->-r requirements.txt (line 24)) (2.11.1)\n",
            "Requirement already satisfied: jsonschema in /usr/local/lib/python3.6/dist-packages (from flask-restplus->-r requirements.txt (line 25)) (2.6.0)\n",
            "Requirement already satisfied: pytz in /usr/local/lib/python3.6/dist-packages (from flask-restplus->-r requirements.txt (line 25)) (2018.9)\n",
            "Collecting aniso8601>=0.82\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/eb/e4/787e104b58eadc1a710738d4e418d7e599e4e778e52cb8e5d5ef6ddd5833/aniso8601-8.0.0-py2.py3-none-any.whl (43kB)\n",
            "\u001b[K     |████████████████████████████████| 51kB 7.5MB/s \n",
            "\u001b[?25hCollecting onnx>=1.2.3\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/f5/f4/e126b60d109ad1e80020071484b935980b7cce1e4796073aab086a2d6902/onnx-1.6.0-cp36-cp36m-manylinux1_x86_64.whl (4.8MB)\n",
            "\u001b[K     |████████████████████████████████| 4.8MB 34.7MB/s \n",
            "\u001b[?25hRequirement already satisfied: docutils<0.16,>=0.10 in /usr/local/lib/python3.6/dist-packages (from botocore<1.16.0,>=1.15.38->boto3->-r requirements.txt (line 9)) (0.15.2)\n",
            "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn->sklearn->-r requirements.txt (line 14)) (0.14.1)\n",
            "Requirement already satisfied: keras-applications>=1.0.6 in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->-r requirements.txt (line 16)) (1.0.8)\n",
            "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->-r requirements.txt (line 16)) (2.10.0)\n",
            "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->-r requirements.txt (line 16)) (1.1.0)\n",
            "Collecting websocket-client>=0.32.0\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/4c/5f/f61b420143ed1c8dc69f9eaec5ff1ac36109d52c80de49d66e0c36c3dfdf/websocket_client-0.57.0-py2.py3-none-any.whl (200kB)\n",
            "\u001b[K     |████████████████████████████████| 204kB 49.1MB/s \n",
            "\u001b[?25hCollecting python-editor>=0.3\n",
            "  Downloading https://files.pythonhosted.org/packages/c6/d3/201fc3abe391bbae6606e6f1d598c15d367033332bd54352b12f35513717/python_editor-1.0.4-py3-none-any.whl\n",
            "Collecting Mako\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/50/78/f6ade1e18aebda570eed33b7c534378d9659351cadce2fcbc7b31be5f615/Mako-1.1.2-py2.py3-none-any.whl (75kB)\n",
            "\u001b[K     |████████████████████████████████| 81kB 9.8MB/s \n",
            "\u001b[?25hRequirement already satisfied: tabulate>=0.7.7 in /usr/local/lib/python3.6/dist-packages (from databricks-cli>=0.8.0->mlflow==1.0.0->-r requirements.txt (line 17)) (0.8.7)\n",
            "Collecting configparser>=0.3.5\n",
            "  Downloading https://files.pythonhosted.org/packages/4b/6b/01baa293090240cf0562cc5eccb69c6f5006282127f2b846fad011305c79/configparser-5.0.0-py3-none-any.whl\n",
            "Collecting gitdb<5,>=4.0.1\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/74/52/ca35448b56c53a079d3ffe18b1978c6e424f6d4df02404877094c89f5bfb/gitdb-4.0.4-py3-none-any.whl (63kB)\n",
            "\u001b[K     |████████████████████████████████| 71kB 10.2MB/s \n",
            "\u001b[?25hRequirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from Jinja2>=2.10.1->flask->-r requirements.txt (line 24)) (1.1.1)\n",
            "Requirement already satisfied: typing-extensions>=3.6.2.1 in /usr/local/lib/python3.6/dist-packages (from onnx>=1.2.3->onnxruntime->-r requirements.txt (line 30)) (3.6.6)\n",
            "Collecting smmap<4,>=3.0.1\n",
            "  Downloading https://files.pythonhosted.org/packages/27/b1/e379cfb7c07bbf8faee29c4a1a2469dbea525f047c2b454c4afdefa20a30/smmap-3.0.2-py2.py3-none-any.whl\n",
            "Building wheels for collected packages: alembic\n",
            "  Building wheel for alembic (PEP 517) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for alembic: filename=alembic-1.4.2-cp36-none-any.whl size=159543 sha256=0891797d8c0abd6a76807cafacf33ef9e161a7f2b83f9a95724a3803bc2c7a37\n",
            "  Stored in directory: /root/.cache/pip/wheels/1f/04/83/76023f7a4c14688c0b5c2682a96392cfdd3ee4449eaaa287ef\n",
            "Successfully built alembic\n",
            "Building wheels for collected packages: seqeval, simplejson, querystring-parser, databricks-cli, sacremoses\n",
            "  Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for seqeval: filename=seqeval-0.0.12-cp36-none-any.whl size=7424 sha256=b40eb771b23d613b9bd479540b94e508183d8fce88457fc51cd63f885c644a83\n",
            "  Stored in directory: /root/.cache/pip/wheels/4f/32/0a/df3b340a82583566975377d65e724895b3fad101a3fb729f68\n",
            "  Building wheel for simplejson (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for simplejson: filename=simplejson-3.17.0-cp36-cp36m-linux_x86_64.whl size=114206 sha256=b916793fe2b66118b83b28018acf3970123a30be008b1712d502dcbc45e65d7b\n",
            "  Stored in directory: /root/.cache/pip/wheels/86/c0/83/dcd0339abb2640544bb8e0938aab2d069cef55e5647ce6e097\n",
            "  Building wheel for querystring-parser (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for querystring-parser: filename=querystring_parser-1.2.4-cp36-none-any.whl size=7079 sha256=e3706658175beab84332d9595bb8412240ba680bf5cd35d8b4c2d22bd93ea44a\n",
            "  Stored in directory: /root/.cache/pip/wheels/1e/41/34/23ebf5d1089a9aed847951e0ee375426eb4ad0a7079d88d41e\n",
            "  Building wheel for databricks-cli (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for databricks-cli: filename=databricks_cli-0.10.0-cp36-none-any.whl size=84285 sha256=f5cf7160929d401610bdbabc1b61f3596e26f43d90605c83955e8685ff2b19ad\n",
            "  Stored in directory: /root/.cache/pip/wheels/1e/e5/2d/a19c0bfd38005176063f130d72de17cb3d2d32c0ee384e7493\n",
            "  Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for sacremoses: filename=sacremoses-0.0.41-cp36-none-any.whl size=893334 sha256=4847c63d318afc68ec3491f90c2538cec9a07f8b35f525ad57f31c208e38b215\n",
            "  Stored in directory: /root/.cache/pip/wheels/22/5a/d4/b020a81249de7dc63758a34222feaa668dbe8ebfe9170cc9b1\n",
            "Successfully built seqeval simplejson querystring-parser databricks-cli sacremoses\n",
            "Installing collected packages: seqeval, simplejson, querystring-parser, websocket-client, docker, python-editor, Mako, alembic, configparser, databricks-cli, gunicorn, smmap, gitdb, gitpython, mlflow, sacremoses, tokenizers, sentencepiece, transformers, dotmap, Werkzeug, aniso8601, flask-restplus, flask-cors, onnx, onnxruntime\n",
            "  Found existing installation: Werkzeug 1.0.1\n",
            "    Uninstalling Werkzeug-1.0.1:\n",
            "      Successfully uninstalled Werkzeug-1.0.1\n",
            "Successfully installed Mako-1.1.2 Werkzeug-0.16.1 alembic-1.4.2 aniso8601-8.0.0 configparser-5.0.0 databricks-cli-0.10.0 docker-4.2.0 dotmap-1.3.0 flask-cors-3.0.8 flask-restplus-0.13.0 gitdb-4.0.4 gitpython-3.1.1 gunicorn-20.0.4 mlflow-1.0.0 onnx-1.6.0 onnxruntime-1.2.0 python-editor-1.0.4 querystring-parser-1.2.4 sacremoses-0.0.41 sentencepiece-0.1.85 seqeval-0.0.12 simplejson-3.17.0 smmap-3.0.2 tokenizers-0.5.2 transformers-2.7.0 websocket-client-0.57.0\n",
            "Obtaining file:///content/FARM\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (46.1.3)\n",
            "Requirement already satisfied: wheel in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.34.2)\n",
            "Requirement already satisfied: torch>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.4.0)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (4.38.0)\n",
            "Requirement already satisfied: boto3 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.12.38)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (2.21.0)\n",
            "Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.4.1)\n",
            "Requirement already satisfied: sklearn in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.0)\n",
            "Requirement already satisfied: seqeval in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.0.12)\n",
            "Requirement already satisfied: mlflow==1.0.0 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.0.0)\n",
            "Requirement already satisfied: transformers==2.7.0 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (2.7.0)\n",
            "Requirement already satisfied: dotmap==1.3.0 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.3.0)\n",
            "Requirement already satisfied: Werkzeug==0.16.1 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.16.1)\n",
            "Requirement already satisfied: flask in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.1.2)\n",
            "Requirement already satisfied: flask-restplus in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.13.0)\n",
            "Requirement already satisfied: flask-cors in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (3.0.8)\n",
            "Requirement already satisfied: dill in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.3.1.1)\n",
            "Requirement already satisfied: onnxruntime in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.2.0)\n",
            "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->farm==0.4.2) (0.9.5)\n",
            "Requirement already satisfied: botocore<1.16.0,>=1.15.38 in /usr/local/lib/python3.6/dist-packages (from boto3->farm==0.4.2) (1.15.38)\n",
            "Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /usr/local/lib/python3.6/dist-packages (from boto3->farm==0.4.2) (0.3.3)\n",
            "Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->farm==0.4.2) (2.8)\n",
            "Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->farm==0.4.2) (1.24.3)\n",
            "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->farm==0.4.2) (3.0.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->farm==0.4.2) (2020.4.5.1)\n",
            "Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from scipy>=1.3.2->farm==0.4.2) (1.18.2)\n",
            "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.6/dist-packages (from sklearn->farm==0.4.2) (0.22.2.post1)\n",
            "Requirement already satisfied: Keras>=2.2.4 in /usr/local/lib/python3.6/dist-packages (from seqeval->farm==0.4.2) (2.3.1)\n",
            "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.3.0)\n",
            "Requirement already satisfied: protobuf>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (3.10.0)\n",
            "Requirement already satisfied: databricks-cli>=0.8.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (0.10.0)\n",
            "Requirement already satisfied: simplejson in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (3.17.0)\n",
            "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.12.0)\n",
            "Requirement already satisfied: gunicorn in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (20.0.4)\n",
            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (3.13)\n",
            "Requirement already satisfied: entrypoints in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (0.3)\n",
            "Requirement already satisfied: sqlparse in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (0.3.1)\n",
            "Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.3.16)\n",
            "Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.0.3)\n",
            "Requirement already satisfied: gitpython>=2.1.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (3.1.1)\n",
            "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (7.1.1)\n",
            "Requirement already satisfied: docker>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (4.2.0)\n",
            "Requirement already satisfied: alembic in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.4.2)\n",
            "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (2.8.1)\n",
            "Requirement already satisfied: querystring-parser in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.2.4)\n",
            "Requirement already satisfied: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (0.7)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (3.0.12)\n",
            "Requirement already satisfied: tokenizers==0.5.2 in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (0.5.2)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (2019.12.20)\n",
            "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (0.1.85)\n",
            "Requirement already satisfied: sacremoses in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (0.0.41)\n",
            "Requirement already satisfied: Jinja2>=2.10.1 in /usr/local/lib/python3.6/dist-packages (from flask->farm==0.4.2) (2.11.1)\n",
            "Requirement already satisfied: itsdangerous>=0.24 in /usr/local/lib/python3.6/dist-packages (from flask->farm==0.4.2) (1.1.0)\n",
            "Requirement already satisfied: pytz in /usr/local/lib/python3.6/dist-packages (from flask-restplus->farm==0.4.2) (2018.9)\n",
            "Requirement already satisfied: aniso8601>=0.82 in /usr/local/lib/python3.6/dist-packages (from flask-restplus->farm==0.4.2) (8.0.0)\n",
            "Requirement already satisfied: jsonschema in /usr/local/lib/python3.6/dist-packages (from flask-restplus->farm==0.4.2) (2.6.0)\n",
            "Requirement already satisfied: onnx>=1.2.3 in /usr/local/lib/python3.6/dist-packages (from onnxruntime->farm==0.4.2) (1.6.0)\n",
            "Requirement already satisfied: docutils<0.16,>=0.10 in /usr/local/lib/python3.6/dist-packages (from botocore<1.16.0,>=1.15.38->boto3->farm==0.4.2) (0.15.2)\n",
            "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn->sklearn->farm==0.4.2) (0.14.1)\n",
            "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->farm==0.4.2) (2.10.0)\n",
            "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->farm==0.4.2) (1.1.0)\n",
            "Requirement already satisfied: keras-applications>=1.0.6 in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->farm==0.4.2) (1.0.8)\n",
            "Requirement already satisfied: tabulate>=0.7.7 in /usr/local/lib/python3.6/dist-packages (from databricks-cli>=0.8.0->mlflow==1.0.0->farm==0.4.2) (0.8.7)\n",
            "Requirement already satisfied: configparser>=0.3.5 in /usr/local/lib/python3.6/dist-packages (from databricks-cli>=0.8.0->mlflow==1.0.0->farm==0.4.2) (5.0.0)\n",
            "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.6/dist-packages (from gitpython>=2.1.0->mlflow==1.0.0->farm==0.4.2) (4.0.4)\n",
            "Requirement already satisfied: websocket-client>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from docker>=3.6.0->mlflow==1.0.0->farm==0.4.2) (0.57.0)\n",
            "Requirement already satisfied: Mako in /usr/local/lib/python3.6/dist-packages (from alembic->mlflow==1.0.0->farm==0.4.2) (1.1.2)\n",
            "Requirement already satisfied: python-editor>=0.3 in /usr/local/lib/python3.6/dist-packages (from alembic->mlflow==1.0.0->farm==0.4.2) (1.0.4)\n",
            "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from Jinja2>=2.10.1->flask->farm==0.4.2) (1.1.1)\n",
            "Requirement already satisfied: typing-extensions>=3.6.2.1 in /usr/local/lib/python3.6/dist-packages (from onnx>=1.2.3->onnxruntime->farm==0.4.2) (3.6.6)\n",
            "Requirement already satisfied: smmap<4,>=3.0.1 in /usr/local/lib/python3.6/dist-packages (from gitdb<5,>=4.0.1->gitpython>=2.1.0->mlflow==1.0.0->farm==0.4.2) (3.0.2)\n",
            "Installing collected packages: farm\n",
            "  Running setup.py develop for farm\n",
            "Successfully installed farm\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "kBzm4YkZg6xl",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 364
        },
        "outputId": "4040f151-0e2e-4ead-b6f2-39fe7bdd56b1"
      },
      "source": [
        "import logging\n",
        "from pathlib import Path\n",
        "\n",
        "from farm.data_handler.data_silo import DataSilo\n",
        "from farm.data_handler.processor import NERProcessor\n",
        "from farm.modeling.optimization import initialize_optimizer\n",
        "from farm.infer import Inferencer\n",
        "from farm.modeling.adaptive_model import AdaptiveModel\n",
        "from farm.modeling.language_model import LanguageModel\n",
        "from farm.modeling.prediction_head import TokenClassificationHead\n",
        "from farm.modeling.tokenization import Tokenizer\n",
        "from farm.train import Trainer\n",
        "from farm.utils import set_all_seeds, MLFlowLogger, initialize_device_settings\n",
        "\n",
        "logging.basicConfig(\n",
        "    format=\"%(asctime)s - %(levelname)s - %(name)s -   %(message)s\",\n",
        "    datefmt=\"%m/%d/%Y %H:%M:%S\",\n",
        "    level=logging.INFO,\n",
        ")\n",
        "\n",
        "ml_logger = MLFlowLogger(tracking_uri=\"https://public-mlflow.deepset.ai/\")\n",
        "ml_logger.init_experiment(experiment_name=\"Public_FARM\", run_name=\"Ren_en-ner-colb2\")\n"
      ],
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "04/15/2020 20:11:05 - INFO - transformers.file_utils -   PyTorch version 1.4.0 available.\n",
            "04/15/2020 20:11:06 - INFO - transformers.file_utils -   TensorFlow version 2.2.0-rc2 available.\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "\n",
            " __          __  _                            _        \n",
            " \\ \\        / / | |                          | |       \n",
            "  \\ \\  /\\  / /__| | ___ ___  _ __ ___   ___  | |_ ___  \n",
            "   \\ \\/  \\/ / _ \\ |/ __/ _ \\| '_ ` _ \\ / _ \\ | __/ _ \\ \n",
            "    \\  /\\  /  __/ | (_| (_) | | | | | |  __/ | || (_) |\n",
            "     \\/  \\/ \\___|_|\\___\\___/|_| |_| |_|\\___|  \\__\\___/ \n",
            "  ______      _____  __  __  \n",
            " |  ____/\\   |  __ \\|  \\/  |              _.-^-._    .--.\n",
            " | |__ /  \\  | |__) | \\  / |           .-'   _   '-. |__|\n",
            " |  __/ /\\ \\ |  _  /| |\\/| |          /     |_|     \\|  |\n",
            " | | / ____ \\| | \\ \\| |  | |         /               \\  |\n",
            " |_|/_/    \\_\\_|  \\_\\_|  |_|        /|     _____     |\\ |\n",
            "                                     |    |==|==|    |  |\n",
            "|---||---|---|---|---|---|---|---|---|    |--|--|    |  |\n",
            "|---||---|---|---|---|---|---|---|---|    |==|==|    |  |\n",
            "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            " \n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "nQqRPoiahHTE",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000,
          "referenced_widgets": [
            "1b39397a3b7c4f46af0f78c302af8b73",
            "1954a748853b45018b25be2dccca4e80",
            "c60b9896692f41d0ac6f25bf6aa4ea24",
            "9855be6e35d047b1806ca63798ef123e",
            "7c9049195247414fb19297ab266e1539",
            "7d7eb476372e4a3bb7f47b25431a6150",
            "bbbd1a43688549968fd2ce8a924e403b",
            "1d0391e4ee324d25a8cf93f4758fd62f"
          ]
        },
        "outputId": "31a5af11-0846-4499-b9c9-b4de618877ea"
      },
      "source": [
        "##########################\n",
        "########## Settings\n",
        "##########################\n",
        "set_all_seeds(seed=42)\n",
        "device, n_gpu = initialize_device_settings(use_cuda=True)#, local_rank=-1, use_amp=None)\n",
        "n_epochs = 2\n",
        "# TODO gradient_accumulation_steps?\n",
        "# TODO layer_dims?\n",
        "warmup_proportion = 0.4\n",
        "batch_size = 64\n",
        "evaluate_every = 400\n",
        "lang_model =  \"bert-base-cased\"\n",
        "do_lower_case = False\n",
        "\n",
        "# 1.Create a tokenizer\n",
        "tokenizer = Tokenizer.load(\n",
        "    pretrained_model_name_or_path=lang_model, do_lower_case=do_lower_case\n",
        ")\n",
        "\n",
        "# 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset\n",
        "ner_labels = [\"[PAD]\", \"X\", \"O\", \"B-MISC\", \"I-MISC\", \"B-PER\", \"I-PER\", \"B-ORG\", \"I-ORG\", \"B-LOC\", \"I-LOC\", \"B-OTH\", \"I-OTH\"]\n",
        "\n",
        "processor = NERProcessor(\n",
        "    tokenizer=tokenizer, max_seq_len=128, data_dir=Path(\"../data/conll03-en\"), delimiter=\" \", metric=\"seq_f1\", label_list=ner_labels\n",
        ")\n",
        "\n",
        "# 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets\n",
        "data_silo = DataSilo(processor=processor, batch_size=batch_size)\n"
      ],
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "04/15/2020 20:11:30 - INFO - farm.utils -   device: cuda n_gpu: 1, distributed training: False, automatic mixed precision training: None\n",
            "04/15/2020 20:11:30 - INFO - farm.modeling.tokenization -   Loading tokenizer of type 'BertTokenizer'\n",
            "04/15/2020 20:11:30 - INFO - filelock -   Lock 139915751362176 acquired on /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1.lock\n",
            "04/15/2020 20:11:30 - INFO - transformers.file_utils -   https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmp0l3ymdpt\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "1b39397a3b7c4f46af0f78c302af8b73",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(IntProgress(value=0, description='Downloading', max=213450, style=ProgressStyle(description_wid…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "04/15/2020 20:11:30 - INFO - transformers.file_utils -   storing https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt in cache at /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1\n",
            "04/15/2020 20:11:30 - INFO - transformers.file_utils -   creating metadata file for /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1\n",
            "04/15/2020 20:11:30 - INFO - filelock -   Lock 139915751362176 released on /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1.lock\n",
            "04/15/2020 20:11:30 - INFO - transformers.tokenization_utils -   loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt from cache at /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "04/15/2020 20:11:31 - INFO - farm.data_handler.data_silo -   \n",
            "Loading data into the data silo ... \n",
            "              ______\n",
            "               |o  |   !\n",
            "   __          |:`_|---'-.\n",
            "  |__|______.-/ _ \\-----.|       \n",
            " (o)(o)------'\\ _ /     ( )      \n",
            " \n",
            "04/15/2020 20:11:31 - INFO - farm.data_handler.data_silo -   Loading train set from: ../data/conll03-en/train.txt \n",
            "04/15/2020 20:11:31 - INFO - farm.data_handler.utils -    Couldn't find ../data/conll03-en/train.txt locally. Trying to download ...\n",
            "04/15/2020 20:11:31 - INFO - farm.data_handler.utils -   downloading and extracting file conll03-en to dir /content/data\n",
            "04/15/2020 20:11:32 - INFO - farm.data_handler.data_silo -   Got ya 1 parallel workers to convert 14041 dictionaries to pytorch datasets (chunksize = 2000)...\n",
            "04/15/2020 20:11:32 - INFO - farm.data_handler.data_silo -    0 \n",
            "04/15/2020 20:11:32 - INFO - farm.data_handler.data_silo -   /w\\\n",
            "04/15/2020 20:11:32 - INFO - farm.data_handler.data_silo -   /'\\\n",
            "04/15/2020 20:11:32 - INFO - farm.data_handler.data_silo -   \n",
            "Preprocessing Dataset ../data/conll03-en/train.txt:   0%|          | 0/14041 [00:00<?, ? Dicts/s]04/15/2020 20:11:34 - INFO - farm.data_handler.processor -   *** Show 2 random examples ***\n",
            "04/15/2020 20:11:34 - INFO - farm.data_handler.processor -   \n",
            "\n",
            "      .--.        _____                       _      \n",
            "    .'_\\/_'.     / ____|                     | |     \n",
            "    '. /\\ .'    | (___   __ _ _ __ ___  _ __ | | ___ \n",
            "      \"||\"       \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n",
            "       || /\\     ____) | (_| | | | | | | |_) | |  __/\n",
            "    /\\ ||//\\)   |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n",
            "   (/\\||/                             |_|           \n",
            "______\\||/___________________________________________                     \n",
            "\n",
            "ID: train-1283-0\n",
            "Clear Text: \n",
            " \ttext: ALKHAN-YURT , Russia 1996-08-22\n",
            " \tner_label: ['B-LOC', 'O', 'B-LOC', 'O']\n",
            "Tokenized: \n",
            " \ttokens: ['AL', '##K', '##HA', '##N', '-', 'Y', '##UR', '##T', ',', 'Russia', '1996', '-', '08', '-', '22']\n",
            " \toffsets: [0, 2, 3, 5, 6, 7, 8, 10, 12, 14, 21, 25, 26, 28, 29]\n",
            " \tstart_of_word: [True, False, False, False, False, False, False, False, True, True, True, False, False, False, False]\n",
            "Features: \n",
            " \tinput_ids: [101, 18589, 2428, 11612, 2249, 118, 162, 19556, 1942, 117, 2733, 1820, 118, 4775, 118, 1659, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tinitial_mask: [0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tner_label_ids: [1, 9, 1, 1, 1, 1, 1, 1, 1, 2, 9, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            "_____________________________________________________\n",
            "04/15/2020 20:11:34 - INFO - farm.data_handler.processor -   \n",
            "\n",
            "      .--.        _____                       _      \n",
            "    .'_\\/_'.     / ____|                     | |     \n",
            "    '. /\\ .'    | (___   __ _ _ __ ___  _ __ | | ___ \n",
            "      \"||\"       \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n",
            "       || /\\     ____) | (_| | | | | | | |_) | |  __/\n",
            "    /\\ ||//\\)   |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n",
            "   (/\\||/                             |_|           \n",
            "______\\||/___________________________________________                     \n",
            "\n",
            "ID: train-252-0\n",
            "Clear Text: \n",
            " \ttext: Software Revenue 2,383 1,558 1,086 1,074\n",
            " \tner_label: ['O', 'O', 'O', 'O', 'O', 'O']\n",
            "Tokenized: \n",
            " \ttokens: ['Software', 'Revenue', '2', ',', '38', '##3', '1', ',', '55', '##8', '1', ',', '08', '##6', '1', ',', '07', '##4']\n",
            " \toffsets: [0, 9, 17, 18, 19, 21, 23, 24, 25, 27, 29, 30, 31, 33, 35, 36, 37, 39]\n",
            " \tstart_of_word: [True, True, True, False, False, False, True, False, False, False, True, False, False, False, True, False, False, False]\n",
            "Features: \n",
            " \tinput_ids: [101, 10331, 16944, 123, 117, 3383, 1495, 122, 117, 3731, 1604, 122, 117, 4775, 1545, 122, 117, 5004, 1527, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tinitial_mask: [0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tner_label_ids: [1, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            "_____________________________________________________\n",
            "Preprocessing Dataset ../data/conll03-en/train.txt: 100%|██████████| 14041/14041 [00:13<00:00, 1058.31 Dicts/s]\n",
            "04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo -   Loading dev set from: ../data/conll03-en/dev.txt\n",
            "04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo -   Got ya 1 parallel workers to convert 3250 dictionaries to pytorch datasets (chunksize = 650)...\n",
            "04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo -    0 \n",
            "04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo -   /w\\\n",
            "04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo -   / \\\n",
            "04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo -   \n",
            "Preprocessing Dataset ../data/conll03-en/dev.txt:   0%|          | 0/3250 [00:00<?, ? Dicts/s]04/15/2020 20:11:46 - INFO - farm.data_handler.processor -   *** Show 2 random examples ***\n",
            "04/15/2020 20:11:46 - INFO - farm.data_handler.processor -   \n",
            "\n",
            "      .--.        _____                       _      \n",
            "    .'_\\/_'.     / ____|                     | |     \n",
            "    '. /\\ .'    | (___   __ _ _ __ ___  _ __ | | ___ \n",
            "      \"||\"       \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n",
            "       || /\\     ____) | (_| | | | | | | |_) | |  __/\n",
            "    /\\ ||//\\)   |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n",
            "   (/\\||/                             |_|           \n",
            "______\\||/___________________________________________                     \n",
            "\n",
            "ID: train-259-0\n",
            "Clear Text: \n",
            " \ttext: HOUSTON AT PITTSBURGH\n",
            " \tner_label: ['B-ORG', 'O', 'B-LOC']\n",
            "Tokenized: \n",
            " \ttokens: ['H', '##O', '##US', '##TO', '##N', 'AT', 'P', '##IT', '##TS', '##B', '##UR', '##G', '##H']\n",
            " \toffsets: [0, 1, 2, 4, 6, 8, 11, 12, 14, 16, 17, 19, 20]\n",
            " \tstart_of_word: [True, False, False, False, False, True, True, False, False, False, False, False, False]\n",
            "Features: \n",
            " \tinput_ids: [101, 145, 2346, 13329, 18082, 2249, 13020, 153, 12150, 11365, 2064, 19556, 2349, 3048, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tinitial_mask: [0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tner_label_ids: [1, 7, 1, 1, 1, 1, 2, 9, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            "_____________________________________________________\n",
            "04/15/2020 20:11:46 - INFO - farm.data_handler.processor -   \n",
            "\n",
            "      .--.        _____                       _      \n",
            "    .'_\\/_'.     / ____|                     | |     \n",
            "    '. /\\ .'    | (___   __ _ _ __ ___  _ __ | | ___ \n",
            "      \"||\"       \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n",
            "       || /\\     ____) | (_| | | | | | | |_) | |  __/\n",
            "    /\\ ||//\\)   |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n",
            "   (/\\||/                             |_|           \n",
            "______\\||/___________________________________________                     \n",
            "\n",
            "ID: train-92-0\n",
            "Clear Text: \n",
            " \ttext: Red Star ( Yugoslavia ) beat Dinamo ( Russia ) 92-90 ( halftime\n",
            " \tner_label: ['B-ORG', 'I-ORG', 'O', 'B-LOC', 'O', 'O', 'B-ORG', 'O', 'B-LOC', 'O', 'O', 'O', 'O']\n",
            "Tokenized: \n",
            " \ttokens: ['Red', 'Star', '(', 'Yugoslavia', ')', 'beat', 'Dinamo', '(', 'Russia', ')', '92', '-', '90', '(', 'halftime']\n",
            " \toffsets: [0, 4, 9, 11, 22, 24, 29, 36, 38, 45, 47, 49, 50, 53, 55]\n",
            " \tstart_of_word: [True, True, True, True, True, True, True, True, True, True, True, False, False, True, True]\n",
            "Features: \n",
            " \tinput_ids: [101, 2156, 2537, 113, 8575, 114, 3222, 24780, 113, 2733, 114, 5556, 118, 3078, 113, 26077, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tinitial_mask: [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tner_label_ids: [1, 7, 8, 2, 9, 2, 2, 7, 2, 9, 2, 2, 1, 1, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            "_____________________________________________________\n",
            "Preprocessing Dataset ../data/conll03-en/dev.txt: 100%|██████████| 3250/3250 [00:03<00:00, 1028.94 Dicts/s]\n",
            "04/15/2020 20:11:48 - INFO - farm.data_handler.data_silo -   Loading test set from: ../data/conll03-en/test.txt\n",
            "04/15/2020 20:11:49 - INFO - farm.data_handler.data_silo -   Got ya 1 parallel workers to convert 3453 dictionaries to pytorch datasets (chunksize = 691)...\n",
            "04/15/2020 20:11:49 - INFO - farm.data_handler.data_silo -    0 \n",
            "04/15/2020 20:11:49 - INFO - farm.data_handler.data_silo -   /w\\\n",
            "04/15/2020 20:11:49 - INFO - farm.data_handler.data_silo -   /'\\\n",
            "04/15/2020 20:11:49 - INFO - farm.data_handler.data_silo -   \n",
            "Preprocessing Dataset ../data/conll03-en/test.txt:   0%|          | 0/3453 [00:00<?, ? Dicts/s]04/15/2020 20:11:49 - INFO - farm.data_handler.processor -   *** Show 2 random examples ***\n",
            "04/15/2020 20:11:49 - INFO - farm.data_handler.processor -   \n",
            "\n",
            "      .--.        _____                       _      \n",
            "    .'_\\/_'.     / ____|                     | |     \n",
            "    '. /\\ .'    | (___   __ _ _ __ ___  _ __ | | ___ \n",
            "      \"||\"       \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n",
            "       || /\\     ____) | (_| | | | | | | |_) | |  __/\n",
            "    /\\ ||//\\)   |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n",
            "   (/\\||/                             |_|           \n",
            "______\\||/___________________________________________                     \n",
            "\n",
            "ID: train-573-0\n",
            "Clear Text: \n",
            " \ttext: Feyenoord 17 11 3 3 29 20 36\n",
            " \tner_label: ['B-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'O']\n",
            "Tokenized: \n",
            " \ttokens: ['Fe', '##ye', '##no', '##ord', '17', '11', '3', '3', '29', '20', '36']\n",
            " \toffsets: [0, 2, 4, 6, 10, 13, 16, 18, 20, 23, 26]\n",
            " \tstart_of_word: [True, False, False, False, True, True, True, True, True, True, True]\n",
            "Features: \n",
            " \tinput_ids: [101, 11907, 4980, 2728, 6944, 1542, 1429, 124, 124, 1853, 1406, 3164, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tinitial_mask: [0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tner_label_ids: [1, 7, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            "_____________________________________________________\n",
            "04/15/2020 20:11:49 - INFO - farm.data_handler.processor -   \n",
            "\n",
            "      .--.        _____                       _      \n",
            "    .'_\\/_'.     / ____|                     | |     \n",
            "    '. /\\ .'    | (___   __ _ _ __ ___  _ __ | | ___ \n",
            "      \"||\"       \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n",
            "       || /\\     ____) | (_| | | | | | | |_) | |  __/\n",
            "    /\\ ||//\\)   |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n",
            "   (/\\||/                             |_|           \n",
            "______\\||/___________________________________________                     \n",
            "\n",
            "ID: train-601-0\n",
            "Clear Text: \n",
            " \ttext: Bayern Munich 16 9 6 1 26 14 33\n",
            " \tner_label: ['B-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'O']\n",
            "Tokenized: \n",
            " \ttokens: ['Bayern', 'Munich', '16', '9', '6', '1', '26', '14', '33']\n",
            " \toffsets: [0, 7, 14, 17, 19, 21, 23, 26, 29]\n",
            " \tstart_of_word: [True, True, True, True, True, True, True, True, True]\n",
            "Features: \n",
            " \tinput_ids: [101, 23517, 6947, 1479, 130, 127, 122, 1744, 1489, 3081, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tinitial_mask: [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tner_label_ids: [1, 7, 8, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            "_____________________________________________________\n",
            "Preprocessing Dataset ../data/conll03-en/test.txt: 100%|██████████| 3453/3453 [00:02<00:00, 1183.91 Dicts/s]\n",
            "04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo -   Examples in train: 14041\n",
            "04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo -   Examples in dev  : 3250\n",
            "04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo -   Examples in test : 3453\n",
            "04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo -   \n",
            "04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo -   Longest sequence length observed after clipping:     128\n",
            "04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo -   Average sequence length after clipping: 21.411010611779787\n",
            "04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo -   Proportion clipped:      7.121999857560003e-05\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qJEnJbStibuG",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 448,
          "referenced_widgets": [
            "aacfc007c1f5457c8f54b30ec66863b1",
            "40ff435523b04903aad8e65507a662d3",
            "2ce03980b8eb47e3bc4def1a25f9f150",
            "3401c4064ff449ad9b7a785f85629ec1",
            "0b3502a0bc304f51a13df06516697210",
            "bc38e1ba9e1e47d0846c62ab55042118",
            "58a7a998b3bd48b8b273fca4a7d628aa",
            "de3726de50c84a459b04df3a2e2710bd",
            "897ecef9f62e46449dc5da3677f31a51",
            "30ce00d9d7254e42a75f6eb1bd3aea56",
            "6e04f43cc22143a49d165fa78cdebf1d",
            "34078f982a4a49fb87d858d6b926d26c",
            "ee65e576985f4b9f975805df04c2e56a",
            "13d9851cad514f188a2281b5f81827e0",
            "ef25ad38764f424284acc290ab4208e1",
            "17c4e90625074475a3cba8ad8f50a527"
          ]
        },
        "outputId": "0bef4321-55d7-4c6c-fb87-d9305be4268b"
      },
      "source": [
        "\n",
        "# 4. Create an AdaptiveModel\n",
        "# a) which consists of a pretrained language model as a basis\n",
        "language_model = LanguageModel.load(lang_model)\n",
        "# b) and a prediction head on top that is suited for our task => NER\n",
        "prediction_head = TokenClassificationHead(num_labels=len(ner_labels))\n",
        "\n",
        "model = AdaptiveModel(\n",
        "    language_model=language_model,\n",
        "    prediction_heads=[prediction_head],\n",
        "    embeds_dropout_prob=0.1,\n",
        "    lm_output_types=[\"per_token\"],\n",
        "    device=device,\n",
        ")\n",
        "\n",
        "# 5. Create an optimizer\n",
        "model, optimizer, lr_schedule = initialize_optimizer(\n",
        "    model=model,\n",
        "    learning_rate=5e-5,\n",
        "    schedule_opts={\"name\": \"LinearWarmup\", \"warmup_proportion\": warmup_proportion},\n",
        "    n_batches=len(data_silo.loaders[\"train\"]),\n",
        "    n_epochs=n_epochs,\n",
        "    device=device,\n",
        ")\n",
        "\n",
        "# 6. Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time\n",
        "trainer = Trainer(\n",
        "    model=model,\n",
        "    optimizer=optimizer,\n",
        "    data_silo=data_silo,\n",
        "    epochs=n_epochs,\n",
        "    n_gpu=n_gpu,\n",
        "    lr_schedule=lr_schedule,\n",
        "    evaluate_every=evaluate_every,\n",
        "    device=device,\n",
        ")\n"
      ],
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "04/15/2020 20:12:49 - INFO - filelock -   Lock 139915603172432 acquired on /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.3d5adf10d3445c36ce131f4c6416aa62e9b58e1af56b97664773f4858a46286e.lock\n",
            "04/15/2020 20:12:49 - INFO - transformers.file_utils -   https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmp0axpxuxr\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "aacfc007c1f5457c8f54b30ec66863b1",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(IntProgress(value=0, description='Downloading', max=361, style=ProgressStyle(description_width=…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "04/15/2020 20:12:49 - INFO - transformers.file_utils -   storing https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json in cache at /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.3d5adf10d3445c36ce131f4c6416aa62e9b58e1af56b97664773f4858a46286e\n",
            "04/15/2020 20:12:49 - INFO - transformers.file_utils -   creating metadata file for /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.3d5adf10d3445c36ce131f4c6416aa62e9b58e1af56b97664773f4858a46286e\n",
            "04/15/2020 20:12:49 - INFO - filelock -   Lock 139915603172432 released on /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.3d5adf10d3445c36ce131f4c6416aa62e9b58e1af56b97664773f4858a46286e.lock\n",
            "04/15/2020 20:12:49 - INFO - filelock -   Lock 139915764977336 acquired on /root/.cache/torch/transformers/35d8b9d36faaf46728a0192d82bf7d00137490cd6074e8500778afed552a67e5.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2.lock\n",
            "04/15/2020 20:12:49 - INFO - transformers.file_utils -   https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmprdof9097\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "897ecef9f62e46449dc5da3677f31a51",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(IntProgress(value=0, description='Downloading', max=435779157, style=ProgressStyle(description_…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "04/15/2020 20:12:56 - INFO - transformers.file_utils -   storing https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-pytorch_model.bin in cache at /root/.cache/torch/transformers/35d8b9d36faaf46728a0192d82bf7d00137490cd6074e8500778afed552a67e5.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2\n",
            "04/15/2020 20:12:56 - INFO - transformers.file_utils -   creating metadata file for /root/.cache/torch/transformers/35d8b9d36faaf46728a0192d82bf7d00137490cd6074e8500778afed552a67e5.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2\n",
            "04/15/2020 20:12:56 - INFO - filelock -   Lock 139915764977336 released on /root/.cache/torch/transformers/35d8b9d36faaf46728a0192d82bf7d00137490cd6074e8500778afed552a67e5.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2.lock\n",
            "04/15/2020 20:12:56 - INFO - transformers.modeling_utils -   loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-pytorch_model.bin from cache at /root/.cache/torch/transformers/35d8b9d36faaf46728a0192d82bf7d00137490cd6074e8500778afed552a67e5.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "04/15/2020 20:12:59 - WARNING - farm.modeling.language_model -   Could not automatically detect from language model name what language it is. \n",
            "\t We guess it's an *ENGLISH* model ... \n",
            "\t If not: Init the language model by supplying the 'language' param.\n",
            "04/15/2020 20:12:59 - INFO - farm.modeling.prediction_head -   Prediction head initialized with size [768, 13]\n",
            "04/15/2020 20:13:16 - INFO - farm.modeling.optimization -   Loading optimizer `TransformersAdamW`: '{'correct_bias': False, 'weight_decay': 0.01, 'lr': 5e-05}'\n",
            "04/15/2020 20:13:16 - INFO - farm.modeling.optimization -   Using scheduler 'get_linear_schedule_with_warmup'\n",
            "04/15/2020 20:13:17 - INFO - farm.modeling.optimization -   Loading schedule `get_linear_schedule_with_warmup`: '{'num_training_steps': 440, 'num_warmup_steps': 176}'\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "LdUzNWzkhL9F",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "9d9420bb-996b-4804-9df1-131fabc0466b"
      },
      "source": [
        "\n",
        "# 7. Let it grow\n",
        "trainer.train()\n"
      ],
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "04/15/2020 20:13:19 - INFO - farm.train -   \n",
            " \n",
            "\n",
            "          &&& &&  & &&             _____                   _             \n",
            "      && &\\/&\\|& ()|/ @, &&       / ____|                 (_)            \n",
            "      &\\/(/&/&||/& /_/)_&/_&     | |  __ _ __ _____      ___ _ __   __ _ \n",
            "   &() &\\/&|()|/&\\/ '%\" & ()     | | |_ | '__/ _ \\ \\ /\\ / / | '_ \\ / _` |\n",
            "  &_\\_&&_\\ |& |&&/&__%_/_& &&    | |__| | | | (_) \\ V  V /| | | | | (_| |\n",
            "&&   && & &| &| /& & % ()& /&&    \\_____|_|  \\___/ \\_/\\_/ |_|_| |_|\\__, |\n",
            " ()&_---()&\\&\\|&&-&&--%---()~                                       __/ |\n",
            "     &&     \\|||                                                   |___/\n",
            "             |||\n",
            "             |||\n",
            "             |||\n",
            "       , -=-~  .-^- _\n",
            "              `\n",
            "\n",
            "Train epoch 0/2 (Cur. train loss: 0.0387): 100%|██████████| 220/220 [02:46<00:00,  1.32it/s]\n",
            "Train epoch 1/2 (Cur. train loss: 0.0323):  82%|████████▏ | 180/220 [02:15<00:28,  1.39it/s]\n",
            "Evaluating:   0%|          | 0/51 [00:00<?, ?it/s]\u001b[A\n",
            "Evaluating: 100%|██████████| 51/51 [00:18<00:00,  2.71it/s]\n",
            "04/15/2020 20:18:41 - INFO - farm.eval -   \n",
            "\n",
            "\\\\|//       \\\\|//      \\\\|//       \\\\|//     \\\\|//\n",
            "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "***************************************************\n",
            "***** EVALUATION | DEV SET | AFTER 400 BATCHES *****\n",
            "***************************************************\n",
            "\\\\|//       \\\\|//      \\\\|//       \\\\|//     \\\\|//\n",
            "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "\n",
            "04/15/2020 20:18:41 - INFO - farm.eval -   \n",
            " _________ ner _________\n",
            "04/15/2020 20:18:42 - INFO - farm.eval -   loss: 0.6635722540341891\n",
            "04/15/2020 20:18:42 - INFO - farm.eval -   task_name: ner\n",
            "04/15/2020 20:18:42 - INFO - farm.eval -   seq_f1: 0.939728779507785\n",
            "04/15/2020 20:18:42 - INFO - farm.eval -   report: \n",
            "            precision    recall  f1-score   support\n",
            "\n",
            "      LOC       0.96      0.96      0.96      1837\n",
            "     MISC       0.89      0.88      0.89       922\n",
            "      PER       0.97      0.97      0.97      1836\n",
            "      ORG       0.90      0.93      0.91      1341\n",
            "\n",
            "micro avg       0.93      0.95      0.94      5936\n",
            "macro avg       0.94      0.95      0.94      5936\n",
            "\n",
            "Train epoch 1/2 (Cur. train loss: 0.0081): 100%|██████████| 220/220 [03:05<00:00,  1.19it/s]\n",
            "Evaluating: 100%|██████████| 54/54 [00:19<00:00,  2.71it/s]\n",
            "04/15/2020 20:19:31 - INFO - farm.eval -   \n",
            "\n",
            "\\\\|//       \\\\|//      \\\\|//       \\\\|//     \\\\|//\n",
            "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "***************************************************\n",
            "***** EVALUATION | TEST SET | AFTER 440 BATCHES *****\n",
            "***************************************************\n",
            "\\\\|//       \\\\|//      \\\\|//       \\\\|//     \\\\|//\n",
            "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
            "\n",
            "04/15/2020 20:19:31 - INFO - farm.eval -   \n",
            " _________ ner _________\n",
            "04/15/2020 20:19:32 - INFO - farm.eval -   loss: 1.3053936329987164\n",
            "04/15/2020 20:19:32 - INFO - farm.eval -   task_name: ner\n",
            "04/15/2020 20:19:32 - INFO - farm.eval -   seq_f1: 0.9011739968459787\n",
            "04/15/2020 20:19:32 - INFO - farm.eval -   report: \n",
            "            precision    recall  f1-score   support\n",
            "\n",
            "      ORG       0.86      0.90      0.88      1661\n",
            "      PER       0.96      0.95      0.95      1615\n",
            "      LOC       0.93      0.92      0.93      1666\n",
            "     MISC       0.75      0.82      0.79       702\n",
            "\n",
            "micro avg       0.89      0.91      0.90      5644\n",
            "macro avg       0.90      0.91      0.90      5644\n",
            "\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "AdaptiveModel(\n",
              "  (language_model): Bert(\n",
              "    (model): BertModel(\n",
              "      (embeddings): BertEmbeddings(\n",
              "        (word_embeddings): Embedding(28996, 768, padding_idx=0)\n",
              "        (position_embeddings): Embedding(512, 768)\n",
              "        (token_type_embeddings): Embedding(2, 768)\n",
              "        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "        (dropout): Dropout(p=0.1, inplace=False)\n",
              "      )\n",
              "      (encoder): BertEncoder(\n",
              "        (layer): ModuleList(\n",
              "          (0): BertLayer(\n",
              "            (attention): BertAttention(\n",
              "              (self): BertSelfAttention(\n",
              "                (query): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (key): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (value): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "              (output): BertSelfOutput(\n",
              "                (dense): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "            )\n",
              "            (intermediate): BertIntermediate(\n",
              "              (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
              "            )\n",
              "            (output): BertOutput(\n",
              "              (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "              (dropout): Dropout(p=0.1, inplace=False)\n",
              "            )\n",
              "          )\n",
              "          (1): BertLayer(\n",
              "            (attention): BertAttention(\n",
              "              (self): BertSelfAttention(\n",
              "                (query): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (key): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (value): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "              (output): BertSelfOutput(\n",
              "                (dense): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "            )\n",
              "            (intermediate): BertIntermediate(\n",
              "              (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
              "            )\n",
              "            (output): BertOutput(\n",
              "              (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "              (dropout): Dropout(p=0.1, inplace=False)\n",
              "            )\n",
              "          )\n",
              "          (2): BertLayer(\n",
              "            (attention): BertAttention(\n",
              "              (self): BertSelfAttention(\n",
              "                (query): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (key): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (value): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "              (output): BertSelfOutput(\n",
              "                (dense): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "            )\n",
              "            (intermediate): BertIntermediate(\n",
              "              (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
              "            )\n",
              "            (output): BertOutput(\n",
              "              (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "              (dropout): Dropout(p=0.1, inplace=False)\n",
              "            )\n",
              "          )\n",
              "          (3): BertLayer(\n",
              "            (attention): BertAttention(\n",
              "              (self): BertSelfAttention(\n",
              "                (query): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (key): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (value): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "              (output): BertSelfOutput(\n",
              "                (dense): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "            )\n",
              "            (intermediate): BertIntermediate(\n",
              "              (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
              "            )\n",
              "            (output): BertOutput(\n",
              "              (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "              (dropout): Dropout(p=0.1, inplace=False)\n",
              "            )\n",
              "          )\n",
              "          (4): BertLayer(\n",
              "            (attention): BertAttention(\n",
              "              (self): BertSelfAttention(\n",
              "                (query): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (key): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (value): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "              (output): BertSelfOutput(\n",
              "                (dense): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "            )\n",
              "            (intermediate): BertIntermediate(\n",
              "              (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
              "            )\n",
              "            (output): BertOutput(\n",
              "              (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "              (dropout): Dropout(p=0.1, inplace=False)\n",
              "            )\n",
              "          )\n",
              "          (5): BertLayer(\n",
              "            (attention): BertAttention(\n",
              "              (self): BertSelfAttention(\n",
              "                (query): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (key): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (value): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "              (output): BertSelfOutput(\n",
              "                (dense): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "            )\n",
              "            (intermediate): BertIntermediate(\n",
              "              (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
              "            )\n",
              "            (output): BertOutput(\n",
              "              (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "              (dropout): Dropout(p=0.1, inplace=False)\n",
              "            )\n",
              "          )\n",
              "          (6): BertLayer(\n",
              "            (attention): BertAttention(\n",
              "              (self): BertSelfAttention(\n",
              "                (query): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (key): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (value): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "              (output): BertSelfOutput(\n",
              "                (dense): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "            )\n",
              "            (intermediate): BertIntermediate(\n",
              "              (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
              "            )\n",
              "            (output): BertOutput(\n",
              "              (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "              (dropout): Dropout(p=0.1, inplace=False)\n",
              "            )\n",
              "          )\n",
              "          (7): BertLayer(\n",
              "            (attention): BertAttention(\n",
              "              (self): BertSelfAttention(\n",
              "                (query): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (key): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (value): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "              (output): BertSelfOutput(\n",
              "                (dense): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "            )\n",
              "            (intermediate): BertIntermediate(\n",
              "              (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
              "            )\n",
              "            (output): BertOutput(\n",
              "              (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "              (dropout): Dropout(p=0.1, inplace=False)\n",
              "            )\n",
              "          )\n",
              "          (8): BertLayer(\n",
              "            (attention): BertAttention(\n",
              "              (self): BertSelfAttention(\n",
              "                (query): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (key): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (value): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "              (output): BertSelfOutput(\n",
              "                (dense): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "            )\n",
              "            (intermediate): BertIntermediate(\n",
              "              (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
              "            )\n",
              "            (output): BertOutput(\n",
              "              (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "              (dropout): Dropout(p=0.1, inplace=False)\n",
              "            )\n",
              "          )\n",
              "          (9): BertLayer(\n",
              "            (attention): BertAttention(\n",
              "              (self): BertSelfAttention(\n",
              "                (query): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (key): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (value): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "              (output): BertSelfOutput(\n",
              "                (dense): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "            )\n",
              "            (intermediate): BertIntermediate(\n",
              "              (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
              "            )\n",
              "            (output): BertOutput(\n",
              "              (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "              (dropout): Dropout(p=0.1, inplace=False)\n",
              "            )\n",
              "          )\n",
              "          (10): BertLayer(\n",
              "            (attention): BertAttention(\n",
              "              (self): BertSelfAttention(\n",
              "                (query): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (key): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (value): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "              (output): BertSelfOutput(\n",
              "                (dense): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "            )\n",
              "            (intermediate): BertIntermediate(\n",
              "              (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
              "            )\n",
              "            (output): BertOutput(\n",
              "              (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "              (dropout): Dropout(p=0.1, inplace=False)\n",
              "            )\n",
              "          )\n",
              "          (11): BertLayer(\n",
              "            (attention): BertAttention(\n",
              "              (self): BertSelfAttention(\n",
              "                (query): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (key): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (value): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "              (output): BertSelfOutput(\n",
              "                (dense): Linear(in_features=768, out_features=768, bias=True)\n",
              "                (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "                (dropout): Dropout(p=0.1, inplace=False)\n",
              "              )\n",
              "            )\n",
              "            (intermediate): BertIntermediate(\n",
              "              (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
              "            )\n",
              "            (output): BertOutput(\n",
              "              (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
              "              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
              "              (dropout): Dropout(p=0.1, inplace=False)\n",
              "            )\n",
              "          )\n",
              "        )\n",
              "      )\n",
              "      (pooler): BertPooler(\n",
              "        (dense): Linear(in_features=768, out_features=768, bias=True)\n",
              "        (activation): Tanh()\n",
              "      )\n",
              "    )\n",
              "  )\n",
              "  (prediction_heads): ModuleList(\n",
              "    (0): TokenClassificationHead(\n",
              "      (feed_forward): FeedForwardBlock(\n",
              "        (feed_forward): Sequential(\n",
              "          (0): Linear(in_features=768, out_features=13, bias=True)\n",
              "        )\n",
              "      )\n",
              "      (loss_fct): CrossEntropyLoss()\n",
              "    )\n",
              "  )\n",
              "  (dropout): Dropout(p=0.1, inplace=False)\n",
              ")"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 10
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "sQTxu3_1i_Tp",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "\n",
        "# 8. Hooray! You have a model. Store it:\n",
        "save_dir = \"saved_models_bert-en-ner3\"\n",
        "model.save(save_dir)\n",
        "processor.save(save_dir)\n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "EvBGMJK3li9k",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "6e2fccde-a6d3-495f-c46c-4213bd3c040f"
      },
      "source": [
        "model = Inferencer.load(save_dir)\n",
        "\n",
        "# 9. Load it & harvest your fruits (Inference)\n",
        "basic_texts = [\n",
        "    {\"text\": \"I love it when I see Obama in Japan.\"},\n",
        "    {\"text\": \"Not limiting global temperature rise to 1.5 degrees would mean trillions of dollars in economic losses, heat extremes in all inhabited parts of the planet, die-off of large parts of the Amazon rainforest, and millions of climate refugees.\"},\n",
        "    {\"text\": \"August 22, 2019 Amazon’s new plastic packaging has caused outrage among customers and environmental activists who’ve branded it major step backwards.\"},\n",
        "]\n",
        "\n",
        "result = model.inference_from_dicts(dicts=basic_texts)\n",
        "print(result)\n"
      ],
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "04/15/2020 20:20:12 - INFO - farm.utils -   device: cpu n_gpu: 0, distributed training: False, automatic mixed precision training: None\n",
            "04/15/2020 20:20:12 - INFO - transformers.modeling_utils -   loading weights file saved_models_bert-en-ner3/language_model.bin from cache at saved_models_bert-en-ner3/language_model.bin\n",
            "04/15/2020 20:20:14 - INFO - farm.modeling.adaptive_model -   Found files for loading 1 prediction heads\n",
            "04/15/2020 20:20:14 - WARNING - farm.modeling.prediction_head -   `layer_dims` will be deprecated in future releases\n",
            "04/15/2020 20:20:14 - INFO - farm.modeling.prediction_head -   Prediction head initialized with size [768, 13]\n",
            "04/15/2020 20:20:14 - INFO - farm.modeling.prediction_head -   Loading prediction head from saved_models_bert-en-ner3/prediction_head_0.bin\n",
            "04/15/2020 20:20:14 - WARNING - farm.modeling.adaptive_model -   ML logging didn't work: INVALID_PARAMETER_VALUE: Changing param value is not allowed. Param with key='lm_name' was already logged with value='bert-base-cased' for run ID='7c43c4979601470298f408ded7556409. Attempted logging new value 'saved_models_bert-en-ner3'.\n",
            "04/15/2020 20:20:14 - INFO - transformers.tokenization_utils -   Model name 'saved_models_bert-en-ner3' not found in model shortcut name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese, bert-base-german-cased, bert-large-uncased-whole-word-masking, bert-large-cased-whole-word-masking, bert-large-uncased-whole-word-masking-finetuned-squad, bert-large-cased-whole-word-masking-finetuned-squad, bert-base-cased-finetuned-mrpc, bert-base-german-dbmdz-cased, bert-base-german-dbmdz-uncased, bert-base-finnish-cased-v1, bert-base-finnish-uncased-v1, bert-base-dutch-cased). Assuming 'saved_models_bert-en-ner3' is a path, a model identifier, or url to a directory containing tokenizer files.\n",
            "04/15/2020 20:20:14 - INFO - transformers.tokenization_utils -   Didn't find file saved_models_bert-en-ner3/added_tokens.json. We won't load it.\n",
            "04/15/2020 20:20:14 - INFO - transformers.tokenization_utils -   loading file saved_models_bert-en-ner3/vocab.txt\n",
            "04/15/2020 20:20:14 - INFO - transformers.tokenization_utils -   loading file None\n",
            "04/15/2020 20:20:14 - INFO - transformers.tokenization_utils -   loading file saved_models_bert-en-ner3/special_tokens_map.json\n",
            "04/15/2020 20:20:14 - INFO - transformers.tokenization_utils -   loading file saved_models_bert-en-ner3/tokenizer_config.json\n",
            "04/15/2020 20:20:15 - INFO - farm.data_handler.processor -   Initialized processor without tasks. Supply `metric` and `label_list` to the constructor for using the default task or add a custom task later via processor.add_task()\n",
            "04/15/2020 20:20:15 - INFO - farm.utils -   device: cpu n_gpu: 0, distributed training: False, automatic mixed precision training: None\n",
            "04/15/2020 20:20:15 - INFO - farm.infer -   Got ya 1 parallel workers to do inference on dicts (chunksize = 4)...\n",
            "04/15/2020 20:20:15 - INFO - farm.infer -    0 \n",
            "04/15/2020 20:20:15 - INFO - farm.infer -   /w\\\n",
            "04/15/2020 20:20:15 - INFO - farm.infer -   /'\\\n",
            "04/15/2020 20:20:15 - INFO - farm.infer -   \n",
            "04/15/2020 20:20:15 - WARNING - farm.data_handler.input_features -   [Task: ner] Could not convert labels to ids via label_list!\n",
            "If your are running in *inference* mode: Don't worry!\n",
            "If you are running in *training* mode: Verify you are supplying a proper label list to your processor and check that labels in input data are correct.\n",
            "04/15/2020 20:20:15 - WARNING - farm.data_handler.input_features -   [Task: ner] Could not convert labels to ids via label_list!\n",
            "If your are running in *inference* mode: Don't worry!\n",
            "If you are running in *training* mode: Verify you are supplying a proper label list to your processor and check that labels in input data are correct.\n",
            "04/15/2020 20:20:15 - WARNING - farm.data_handler.input_features -   [Task: ner] Could not convert labels to ids via label_list!\n",
            "If your are running in *inference* mode: Don't worry!\n",
            "If you are running in *training* mode: Verify you are supplying a proper label list to your processor and check that labels in input data are correct.\n",
            "04/15/2020 20:20:15 - INFO - farm.data_handler.processor -   *** Show 2 random examples ***\n",
            "04/15/2020 20:20:15 - INFO - farm.data_handler.processor -   \n",
            "\n",
            "      .--.        _____                       _      \n",
            "    .'_\\/_'.     / ____|                     | |     \n",
            "    '. /\\ .'    | (___   __ _ _ __ ___  _ __ | | ___ \n",
            "      \"||\"       \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n",
            "       || /\\     ____) | (_| | | | | | | |_) | |  __/\n",
            "    /\\ ||//\\)   |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n",
            "   (/\\||/                             |_|           \n",
            "______\\||/___________________________________________                     \n",
            "\n",
            "ID: train-2-0\n",
            "Clear Text: \n",
            " \ttext: August 22, 2019 Amazon’s new plastic packaging has caused outrage among customers and environmental activists who’ve branded it major step backwards.\n",
            "Tokenized: \n",
            " \ttokens: ['August', '22', ',', '2019', 'Amazon', '’', 's', 'new', 'plastic', 'packaging', 'has', 'caused', 'outrage', 'among', 'customers', 'and', 'environmental', 'activists', 'who', '’', 've', 'branded', 'it', 'major', 'step', 'backwards', '.']\n",
            " \toffsets: [0, 7, 9, 11, 16, 22, 23, 25, 29, 37, 47, 51, 58, 66, 72, 82, 86, 100, 110, 113, 114, 117, 125, 128, 134, 139, 148]\n",
            " \tstart_of_word: [True, True, False, True, True, False, False, True, True, True, True, True, True, True, True, True, True, True, True, False, False, True, True, True, True, True, False]\n",
            "Features: \n",
            " \tinput_ids: [101, 1360, 1659, 117, 10351, 9786, 787, 188, 1207, 5828, 17019, 1144, 2416, 22052, 1621, 5793, 1105, 4801, 10254, 1150, 787, 1396, 11450, 1122, 1558, 2585, 11316, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tinitial_mask: [0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            "_____________________________________________________\n",
            "04/15/2020 20:20:15 - INFO - farm.data_handler.processor -   \n",
            "\n",
            "      .--.        _____                       _      \n",
            "    .'_\\/_'.     / ____|                     | |     \n",
            "    '. /\\ .'    | (___   __ _ _ __ ___  _ __ | | ___ \n",
            "      \"||\"       \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n",
            "       || /\\     ____) | (_| | | | | | | |_) | |  __/\n",
            "    /\\ ||//\\)   |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n",
            "   (/\\||/                             |_|           \n",
            "______\\||/___________________________________________                     \n",
            "\n",
            "ID: train-0-0\n",
            "Clear Text: \n",
            " \ttext: I love it when I see Obama in Japan.\n",
            "Tokenized: \n",
            " \ttokens: ['I', 'love', 'it', 'when', 'I', 'see', 'Obama', 'in', 'Japan', '.']\n",
            " \toffsets: [0, 2, 7, 10, 15, 17, 21, 27, 30, 35]\n",
            " \tstart_of_word: [True, True, True, True, True, True, True, True, True, False]\n",
            "Features: \n",
            " \tinput_ids: [101, 146, 1567, 1122, 1165, 146, 1267, 7661, 1107, 1999, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            " \tinitial_mask: [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
            "_____________________________________________________\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            "[{'task': 'ner', 'predictions': [{'start': 21, 'end': 26, 'context': 'Obama', 'label': 'PER', 'probability': 0.99587303}, {'start': 30, 'end': 36, 'context': 'Japan.', 'label': 'LOC', 'probability': 0.9992192}, {'start': 186, 'end': 192, 'context': 'Amazon', 'label': 'LOC', 'probability': 0.99963367}, {'start': 16, 'end': 24, 'context': 'Amazon’s', 'label': 'ORG', 'probability': 0.99936336}]}]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "6kiNl6BZlZwt",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "print(result)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "dGFLE2z9l6qy",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}