Skip to content

Instantly share code, notes, and snippets.

@renaud
Last active April 15, 2020 20:22
Show Gist options
  • Save renaud/2317b0c8e6d4ced7abd8088f5594c547 to your computer and use it in GitHub Desktop.
Save renaud/2317b0c8e6d4ced7abd8088f5594c547 to your computer and use it in GitHub Desktop.
FARM_en_NER.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "FARM_en_NER.ipynb",
"provenance": [],
"collapsed_sections": [],
"mount_file_id": "1qqewywPJDcrT4vTNBbkEJGO6ZUHievNO",
"authorship_tag": "ABX9TyNUVWmo35Cs8dOpEfoZBRt6",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"1b39397a3b7c4f46af0f78c302af8b73": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_1954a748853b45018b25be2dccca4e80",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_c60b9896692f41d0ac6f25bf6aa4ea24",
"IPY_MODEL_9855be6e35d047b1806ca63798ef123e"
]
}
},
"1954a748853b45018b25be2dccca4e80": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"c60b9896692f41d0ac6f25bf6aa4ea24": {
"model_module": "@jupyter-widgets/controls",
"model_name": "IntProgressModel",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_7c9049195247414fb19297ab266e1539",
"_dom_classes": [],
"description": "Downloading: 100%",
"_model_name": "IntProgressModel",
"bar_style": "success",
"max": 213450,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 213450,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_7d7eb476372e4a3bb7f47b25431a6150"
}
},
"9855be6e35d047b1806ca63798ef123e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_bbbd1a43688549968fd2ce8a924e403b",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 213k/213k [00:00<00:00, 1.80MB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_1d0391e4ee324d25a8cf93f4758fd62f"
}
},
"7c9049195247414fb19297ab266e1539": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "initial",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"7d7eb476372e4a3bb7f47b25431a6150": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"bbbd1a43688549968fd2ce8a924e403b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"1d0391e4ee324d25a8cf93f4758fd62f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"aacfc007c1f5457c8f54b30ec66863b1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_40ff435523b04903aad8e65507a662d3",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_2ce03980b8eb47e3bc4def1a25f9f150",
"IPY_MODEL_3401c4064ff449ad9b7a785f85629ec1"
]
}
},
"40ff435523b04903aad8e65507a662d3": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"2ce03980b8eb47e3bc4def1a25f9f150": {
"model_module": "@jupyter-widgets/controls",
"model_name": "IntProgressModel",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_0b3502a0bc304f51a13df06516697210",
"_dom_classes": [],
"description": "Downloading: 100%",
"_model_name": "IntProgressModel",
"bar_style": "success",
"max": 361,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 361,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_bc38e1ba9e1e47d0846c62ab55042118"
}
},
"3401c4064ff449ad9b7a785f85629ec1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_58a7a998b3bd48b8b273fca4a7d628aa",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 361/361 [00:00<00:00, 2.83kB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_de3726de50c84a459b04df3a2e2710bd"
}
},
"0b3502a0bc304f51a13df06516697210": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "initial",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"bc38e1ba9e1e47d0846c62ab55042118": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"58a7a998b3bd48b8b273fca4a7d628aa": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"de3726de50c84a459b04df3a2e2710bd": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"897ecef9f62e46449dc5da3677f31a51": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_30ce00d9d7254e42a75f6eb1bd3aea56",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_6e04f43cc22143a49d165fa78cdebf1d",
"IPY_MODEL_34078f982a4a49fb87d858d6b926d26c"
]
}
},
"30ce00d9d7254e42a75f6eb1bd3aea56": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"6e04f43cc22143a49d165fa78cdebf1d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "IntProgressModel",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_ee65e576985f4b9f975805df04c2e56a",
"_dom_classes": [],
"description": "Downloading: 100%",
"_model_name": "IntProgressModel",
"bar_style": "success",
"max": 435779157,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 435779157,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_13d9851cad514f188a2281b5f81827e0"
}
},
"34078f982a4a49fb87d858d6b926d26c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_ef25ad38764f424284acc290ab4208e1",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 436M/436M [00:07<00:00, 58.3MB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_17c4e90625074475a3cba8ad8f50a527"
}
},
"ee65e576985f4b9f975805df04c2e56a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "initial",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"13d9851cad514f188a2281b5f81827e0": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"ef25ad38764f424284acc290ab4208e1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"17c4e90625074475a3cba8ad8f50a527": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/renaud/2317b0c8e6d4ced7abd8088f5594c547/farm_en_ner.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "xyO45RHBgjI0",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 139
},
"outputId": "1565a4ae-7f82-4d8c-978a-1a5c0c2edf66"
},
"source": [
"# https://github.com/deepset-ai/FARM#basic-usage\n",
"!git clone https://github.com/deepset-ai/FARM.git"
],
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": [
"Cloning into 'FARM'...\n",
"remote: Enumerating objects: 85, done.\u001b[K\n",
"remote: Counting objects: 100% (85/85), done.\u001b[K\n",
"remote: Compressing objects: 100% (71/71), done.\u001b[K\n",
"remote: Total 4933 (delta 47), reused 32 (delta 14), pack-reused 4848\u001b[K\n",
"Receiving objects: 100% (4933/4933), 64.95 MiB | 24.13 MiB/s, done.\n",
"Resolving deltas: 100% (3668/3668), done.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "qXzI39Jd1SsY",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "18bd6ed9-4ccd-4114-8804-80a05843b162"
},
"source": [
"cd FARM\n"
],
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"text": [
"/content/FARM\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "eunTBTOW1tBb",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "7db64161-8e55-45f1-ebc2-a93f48ed548d"
},
"source": [
"!pip install -r requirements.txt\n",
"!pip install --editable .\n"
],
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"text": [
"Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 2)) (46.1.3)\n",
"Requirement already satisfied: wheel in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 3)) (0.34.2)\n",
"Requirement already satisfied: torch>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 5)) (1.4.0)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 7)) (4.38.0)\n",
"Requirement already satisfied: boto3 in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 9)) (1.12.38)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 11)) (2.21.0)\n",
"Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 13)) (1.4.1)\n",
"Requirement already satisfied: sklearn in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 14)) (0.0)\n",
"Collecting seqeval\n",
" Downloading https://files.pythonhosted.org/packages/34/91/068aca8d60ce56dd9ba4506850e876aba5e66a6f2f29aa223224b50df0de/seqeval-0.0.12.tar.gz\n",
"Collecting mlflow==1.0.0\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/01/ec/8c9448968d4662e8354b9c3a62e635f8929ed507a45af3d9fdb84be51270/mlflow-1.0.0-py3-none-any.whl (47.7MB)\n",
"\u001b[K |████████████████████████████████| 47.7MB 61kB/s \n",
"\u001b[?25hCollecting transformers==2.7.0\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/37/ba/dda44bbf35b071441635708a3dd568a5ca6bf29f77389f7c7c6818ae9498/transformers-2.7.0-py3-none-any.whl (544kB)\n",
"\u001b[K |████████████████████████████████| 552kB 44.7MB/s \n",
"\u001b[?25hCollecting dotmap==1.3.0\n",
" Downloading https://files.pythonhosted.org/packages/fa/eb/ee5f0358a9e0ede90308d8f34e697e122f191c2702dc4f614eca7770b1eb/dotmap-1.3.0-py3-none-any.whl\n",
"Collecting Werkzeug==0.16.1\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/c2/e4/a859d2fe516f466642fa5c6054fd9646271f9da26b0cac0d2f37fc858c8f/Werkzeug-0.16.1-py2.py3-none-any.whl (327kB)\n",
"\u001b[K |████████████████████████████████| 327kB 33.7MB/s \n",
"\u001b[?25hRequirement already satisfied: flask in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 24)) (1.1.2)\n",
"Collecting flask-restplus\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/c2/a6/b17c848771f96ad039ad9e3ea275e842a16c39c4f3eb9f60ee330b20b6c2/flask_restplus-0.13.0-py2.py3-none-any.whl (2.5MB)\n",
"\u001b[K |████████████████████████████████| 2.5MB 46.2MB/s \n",
"\u001b[?25hCollecting flask-cors\n",
" Downloading https://files.pythonhosted.org/packages/78/38/e68b11daa5d613e3a91e4bf3da76c94ac9ee0d9cd515af9c1ab80d36f709/Flask_Cors-3.0.8-py2.py3-none-any.whl\n",
"Requirement already satisfied: dill in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 29)) (0.3.1.1)\n",
"Collecting onnxruntime\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/69/39/404df5ee608c548dacde43a17faf0248b183fa6163cf9c06aca6a511d760/onnxruntime-1.2.0-cp36-cp36m-manylinux1_x86_64.whl (3.7MB)\n",
"\u001b[K |████████████████████████████████| 3.7MB 47.1MB/s \n",
"\u001b[?25hRequirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /usr/local/lib/python3.6/dist-packages (from boto3->-r requirements.txt (line 9)) (0.3.3)\n",
"Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->-r requirements.txt (line 9)) (0.9.5)\n",
"Requirement already satisfied: botocore<1.16.0,>=1.15.38 in /usr/local/lib/python3.6/dist-packages (from boto3->-r requirements.txt (line 9)) (1.15.38)\n",
"Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->-r requirements.txt (line 11)) (1.24.3)\n",
"Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->-r requirements.txt (line 11)) (2.8)\n",
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->-r requirements.txt (line 11)) (3.0.4)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->-r requirements.txt (line 11)) (2020.4.5.1)\n",
"Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from scipy>=1.3.2->-r requirements.txt (line 13)) (1.18.2)\n",
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.6/dist-packages (from sklearn->-r requirements.txt (line 14)) (0.22.2.post1)\n",
"Requirement already satisfied: Keras>=2.2.4 in /usr/local/lib/python3.6/dist-packages (from seqeval->-r requirements.txt (line 16)) (2.3.1)\n",
"Requirement already satisfied: python-dateutil in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (2.8.1)\n",
"Collecting simplejson\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/98/87/a7b98aa9256c8843f92878966dc3d8d914c14aad97e2c5ce4798d5743e07/simplejson-3.17.0.tar.gz (83kB)\n",
"\u001b[K |████████████████████████████████| 92kB 11.2MB/s \n",
"\u001b[?25hRequirement already satisfied: sqlparse in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (0.3.1)\n",
"Collecting querystring-parser\n",
" Downloading https://files.pythonhosted.org/packages/4a/fa/f54f5662e0eababf0c49e92fd94bf178888562c0e7b677c8941bbbcd1bd6/querystring_parser-1.2.4.tar.gz\n",
"Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (1.12.0)\n",
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (3.13)\n",
"Requirement already satisfied: entrypoints in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (0.3)\n",
"Collecting docker>=3.6.0\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/58/74/379a9d30b1620def158c40b88c43e01c1936a287ebb97afab0699c601c57/docker-4.2.0-py2.py3-none-any.whl (143kB)\n",
"\u001b[K |████████████████████████████████| 153kB 43.3MB/s \n",
"\u001b[?25hCollecting alembic\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/60/1e/cabc75a189de0fbb2841d0975243e59bde8b7822bacbb95008ac6fe9ad47/alembic-1.4.2.tar.gz (1.1MB)\n",
"\u001b[K |████████████████████████████████| 1.1MB 43.5MB/s \n",
"\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
" Preparing wheel metadata ... \u001b[?25l\u001b[?25hdone\n",
"Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (7.1.1)\n",
"Collecting databricks-cli>=0.8.0\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/49/d1/fe0ba3d5c2b4b76ec035aa243bbc2fd0d60607a391f192ebe1656e17a4e2/databricks-cli-0.10.0.tar.gz (45kB)\n",
"\u001b[K |████████████████████████████████| 51kB 6.9MB/s \n",
"\u001b[?25hRequirement already satisfied: protobuf>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (3.10.0)\n",
"Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (1.3.16)\n",
"Requirement already satisfied: cloudpickle in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (1.3.0)\n",
"Collecting gunicorn\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/69/ca/926f7cd3a2014b16870086b2d0fdc84a9e49473c68a8dff8b57f7c156f43/gunicorn-20.0.4-py2.py3-none-any.whl (77kB)\n",
"\u001b[K |████████████████████████████████| 81kB 9.5MB/s \n",
"\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (1.0.3)\n",
"Collecting gitpython>=2.1.0\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/19/1a/0df85d2bddbca33665d2148173d3281b290ac054b5f50163ea735740ac7b/GitPython-3.1.1-py3-none-any.whl (450kB)\n",
"\u001b[K |████████████████████████████████| 460kB 42.3MB/s \n",
"\u001b[?25hCollecting sacremoses\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/99/50/93509f906a40bffd7d175f97fd75ea328ad9bd91f48f59c4bd084c94a25e/sacremoses-0.0.41.tar.gz (883kB)\n",
"\u001b[K |████████████████████████████████| 890kB 38.4MB/s \n",
"\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->-r requirements.txt (line 19)) (2019.12.20)\n",
"Collecting tokenizers==0.5.2\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/d1/3f/73c881ea4723e43c1e9acf317cf407fab3a278daab3a69c98dcac511c04f/tokenizers-0.5.2-cp36-cp36m-manylinux1_x86_64.whl (3.7MB)\n",
"\u001b[K |████████████████████████████████| 3.7MB 41.4MB/s \n",
"\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->-r requirements.txt (line 19)) (3.0.12)\n",
"Collecting sentencepiece\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/74/f4/2d5214cbf13d06e7cb2c20d84115ca25b53ea76fa1f0ade0e3c9749de214/sentencepiece-0.1.85-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)\n",
"\u001b[K |████████████████████████████████| 1.0MB 35.2MB/s \n",
"\u001b[?25hRequirement already satisfied: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->-r requirements.txt (line 19)) (0.7)\n",
"Requirement already satisfied: itsdangerous>=0.24 in /usr/local/lib/python3.6/dist-packages (from flask->-r requirements.txt (line 24)) (1.1.0)\n",
"Requirement already satisfied: Jinja2>=2.10.1 in /usr/local/lib/python3.6/dist-packages (from flask->-r requirements.txt (line 24)) (2.11.1)\n",
"Requirement already satisfied: jsonschema in /usr/local/lib/python3.6/dist-packages (from flask-restplus->-r requirements.txt (line 25)) (2.6.0)\n",
"Requirement already satisfied: pytz in /usr/local/lib/python3.6/dist-packages (from flask-restplus->-r requirements.txt (line 25)) (2018.9)\n",
"Collecting aniso8601>=0.82\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/eb/e4/787e104b58eadc1a710738d4e418d7e599e4e778e52cb8e5d5ef6ddd5833/aniso8601-8.0.0-py2.py3-none-any.whl (43kB)\n",
"\u001b[K |████████████████████████████████| 51kB 7.5MB/s \n",
"\u001b[?25hCollecting onnx>=1.2.3\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/f5/f4/e126b60d109ad1e80020071484b935980b7cce1e4796073aab086a2d6902/onnx-1.6.0-cp36-cp36m-manylinux1_x86_64.whl (4.8MB)\n",
"\u001b[K |████████████████████████████████| 4.8MB 34.7MB/s \n",
"\u001b[?25hRequirement already satisfied: docutils<0.16,>=0.10 in /usr/local/lib/python3.6/dist-packages (from botocore<1.16.0,>=1.15.38->boto3->-r requirements.txt (line 9)) (0.15.2)\n",
"Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn->sklearn->-r requirements.txt (line 14)) (0.14.1)\n",
"Requirement already satisfied: keras-applications>=1.0.6 in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->-r requirements.txt (line 16)) (1.0.8)\n",
"Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->-r requirements.txt (line 16)) (2.10.0)\n",
"Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->-r requirements.txt (line 16)) (1.1.0)\n",
"Collecting websocket-client>=0.32.0\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/4c/5f/f61b420143ed1c8dc69f9eaec5ff1ac36109d52c80de49d66e0c36c3dfdf/websocket_client-0.57.0-py2.py3-none-any.whl (200kB)\n",
"\u001b[K |████████████████████████████████| 204kB 49.1MB/s \n",
"\u001b[?25hCollecting python-editor>=0.3\n",
" Downloading https://files.pythonhosted.org/packages/c6/d3/201fc3abe391bbae6606e6f1d598c15d367033332bd54352b12f35513717/python_editor-1.0.4-py3-none-any.whl\n",
"Collecting Mako\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/50/78/f6ade1e18aebda570eed33b7c534378d9659351cadce2fcbc7b31be5f615/Mako-1.1.2-py2.py3-none-any.whl (75kB)\n",
"\u001b[K |████████████████████████████████| 81kB 9.8MB/s \n",
"\u001b[?25hRequirement already satisfied: tabulate>=0.7.7 in /usr/local/lib/python3.6/dist-packages (from databricks-cli>=0.8.0->mlflow==1.0.0->-r requirements.txt (line 17)) (0.8.7)\n",
"Collecting configparser>=0.3.5\n",
" Downloading https://files.pythonhosted.org/packages/4b/6b/01baa293090240cf0562cc5eccb69c6f5006282127f2b846fad011305c79/configparser-5.0.0-py3-none-any.whl\n",
"Collecting gitdb<5,>=4.0.1\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/74/52/ca35448b56c53a079d3ffe18b1978c6e424f6d4df02404877094c89f5bfb/gitdb-4.0.4-py3-none-any.whl (63kB)\n",
"\u001b[K |████████████████████████████████| 71kB 10.2MB/s \n",
"\u001b[?25hRequirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from Jinja2>=2.10.1->flask->-r requirements.txt (line 24)) (1.1.1)\n",
"Requirement already satisfied: typing-extensions>=3.6.2.1 in /usr/local/lib/python3.6/dist-packages (from onnx>=1.2.3->onnxruntime->-r requirements.txt (line 30)) (3.6.6)\n",
"Collecting smmap<4,>=3.0.1\n",
" Downloading https://files.pythonhosted.org/packages/27/b1/e379cfb7c07bbf8faee29c4a1a2469dbea525f047c2b454c4afdefa20a30/smmap-3.0.2-py2.py3-none-any.whl\n",
"Building wheels for collected packages: alembic\n",
" Building wheel for alembic (PEP 517) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for alembic: filename=alembic-1.4.2-cp36-none-any.whl size=159543 sha256=0891797d8c0abd6a76807cafacf33ef9e161a7f2b83f9a95724a3803bc2c7a37\n",
" Stored in directory: /root/.cache/pip/wheels/1f/04/83/76023f7a4c14688c0b5c2682a96392cfdd3ee4449eaaa287ef\n",
"Successfully built alembic\n",
"Building wheels for collected packages: seqeval, simplejson, querystring-parser, databricks-cli, sacremoses\n",
" Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for seqeval: filename=seqeval-0.0.12-cp36-none-any.whl size=7424 sha256=b40eb771b23d613b9bd479540b94e508183d8fce88457fc51cd63f885c644a83\n",
" Stored in directory: /root/.cache/pip/wheels/4f/32/0a/df3b340a82583566975377d65e724895b3fad101a3fb729f68\n",
" Building wheel for simplejson (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for simplejson: filename=simplejson-3.17.0-cp36-cp36m-linux_x86_64.whl size=114206 sha256=b916793fe2b66118b83b28018acf3970123a30be008b1712d502dcbc45e65d7b\n",
" Stored in directory: /root/.cache/pip/wheels/86/c0/83/dcd0339abb2640544bb8e0938aab2d069cef55e5647ce6e097\n",
" Building wheel for querystring-parser (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for querystring-parser: filename=querystring_parser-1.2.4-cp36-none-any.whl size=7079 sha256=e3706658175beab84332d9595bb8412240ba680bf5cd35d8b4c2d22bd93ea44a\n",
" Stored in directory: /root/.cache/pip/wheels/1e/41/34/23ebf5d1089a9aed847951e0ee375426eb4ad0a7079d88d41e\n",
" Building wheel for databricks-cli (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for databricks-cli: filename=databricks_cli-0.10.0-cp36-none-any.whl size=84285 sha256=f5cf7160929d401610bdbabc1b61f3596e26f43d90605c83955e8685ff2b19ad\n",
" Stored in directory: /root/.cache/pip/wheels/1e/e5/2d/a19c0bfd38005176063f130d72de17cb3d2d32c0ee384e7493\n",
" Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for sacremoses: filename=sacremoses-0.0.41-cp36-none-any.whl size=893334 sha256=4847c63d318afc68ec3491f90c2538cec9a07f8b35f525ad57f31c208e38b215\n",
" Stored in directory: /root/.cache/pip/wheels/22/5a/d4/b020a81249de7dc63758a34222feaa668dbe8ebfe9170cc9b1\n",
"Successfully built seqeval simplejson querystring-parser databricks-cli sacremoses\n",
"Installing collected packages: seqeval, simplejson, querystring-parser, websocket-client, docker, python-editor, Mako, alembic, configparser, databricks-cli, gunicorn, smmap, gitdb, gitpython, mlflow, sacremoses, tokenizers, sentencepiece, transformers, dotmap, Werkzeug, aniso8601, flask-restplus, flask-cors, onnx, onnxruntime\n",
" Found existing installation: Werkzeug 1.0.1\n",
" Uninstalling Werkzeug-1.0.1:\n",
" Successfully uninstalled Werkzeug-1.0.1\n",
"Successfully installed Mako-1.1.2 Werkzeug-0.16.1 alembic-1.4.2 aniso8601-8.0.0 configparser-5.0.0 databricks-cli-0.10.0 docker-4.2.0 dotmap-1.3.0 flask-cors-3.0.8 flask-restplus-0.13.0 gitdb-4.0.4 gitpython-3.1.1 gunicorn-20.0.4 mlflow-1.0.0 onnx-1.6.0 onnxruntime-1.2.0 python-editor-1.0.4 querystring-parser-1.2.4 sacremoses-0.0.41 sentencepiece-0.1.85 seqeval-0.0.12 simplejson-3.17.0 smmap-3.0.2 tokenizers-0.5.2 transformers-2.7.0 websocket-client-0.57.0\n",
"Obtaining file:///content/FARM\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (46.1.3)\n",
"Requirement already satisfied: wheel in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.34.2)\n",
"Requirement already satisfied: torch>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.4.0)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (4.38.0)\n",
"Requirement already satisfied: boto3 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.12.38)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (2.21.0)\n",
"Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.4.1)\n",
"Requirement already satisfied: sklearn in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.0)\n",
"Requirement already satisfied: seqeval in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.0.12)\n",
"Requirement already satisfied: mlflow==1.0.0 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.0.0)\n",
"Requirement already satisfied: transformers==2.7.0 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (2.7.0)\n",
"Requirement already satisfied: dotmap==1.3.0 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.3.0)\n",
"Requirement already satisfied: Werkzeug==0.16.1 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.16.1)\n",
"Requirement already satisfied: flask in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.1.2)\n",
"Requirement already satisfied: flask-restplus in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.13.0)\n",
"Requirement already satisfied: flask-cors in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (3.0.8)\n",
"Requirement already satisfied: dill in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.3.1.1)\n",
"Requirement already satisfied: onnxruntime in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.2.0)\n",
"Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->farm==0.4.2) (0.9.5)\n",
"Requirement already satisfied: botocore<1.16.0,>=1.15.38 in /usr/local/lib/python3.6/dist-packages (from boto3->farm==0.4.2) (1.15.38)\n",
"Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /usr/local/lib/python3.6/dist-packages (from boto3->farm==0.4.2) (0.3.3)\n",
"Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->farm==0.4.2) (2.8)\n",
"Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->farm==0.4.2) (1.24.3)\n",
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->farm==0.4.2) (3.0.4)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->farm==0.4.2) (2020.4.5.1)\n",
"Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from scipy>=1.3.2->farm==0.4.2) (1.18.2)\n",
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.6/dist-packages (from sklearn->farm==0.4.2) (0.22.2.post1)\n",
"Requirement already satisfied: Keras>=2.2.4 in /usr/local/lib/python3.6/dist-packages (from seqeval->farm==0.4.2) (2.3.1)\n",
"Requirement already satisfied: cloudpickle in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.3.0)\n",
"Requirement already satisfied: protobuf>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (3.10.0)\n",
"Requirement already satisfied: databricks-cli>=0.8.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (0.10.0)\n",
"Requirement already satisfied: simplejson in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (3.17.0)\n",
"Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.12.0)\n",
"Requirement already satisfied: gunicorn in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (20.0.4)\n",
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (3.13)\n",
"Requirement already satisfied: entrypoints in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (0.3)\n",
"Requirement already satisfied: sqlparse in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (0.3.1)\n",
"Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.3.16)\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.0.3)\n",
"Requirement already satisfied: gitpython>=2.1.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (3.1.1)\n",
"Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (7.1.1)\n",
"Requirement already satisfied: docker>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (4.2.0)\n",
"Requirement already satisfied: alembic in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.4.2)\n",
"Requirement already satisfied: python-dateutil in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (2.8.1)\n",
"Requirement already satisfied: querystring-parser in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.2.4)\n",
"Requirement already satisfied: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (0.7)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (3.0.12)\n",
"Requirement already satisfied: tokenizers==0.5.2 in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (0.5.2)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (2019.12.20)\n",
"Requirement already satisfied: sentencepiece in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (0.1.85)\n",
"Requirement already satisfied: sacremoses in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (0.0.41)\n",
"Requirement already satisfied: Jinja2>=2.10.1 in /usr/local/lib/python3.6/dist-packages (from flask->farm==0.4.2) (2.11.1)\n",
"Requirement already satisfied: itsdangerous>=0.24 in /usr/local/lib/python3.6/dist-packages (from flask->farm==0.4.2) (1.1.0)\n",
"Requirement already satisfied: pytz in /usr/local/lib/python3.6/dist-packages (from flask-restplus->farm==0.4.2) (2018.9)\n",
"Requirement already satisfied: aniso8601>=0.82 in /usr/local/lib/python3.6/dist-packages (from flask-restplus->farm==0.4.2) (8.0.0)\n",
"Requirement already satisfied: jsonschema in /usr/local/lib/python3.6/dist-packages (from flask-restplus->farm==0.4.2) (2.6.0)\n",
"Requirement already satisfied: onnx>=1.2.3 in /usr/local/lib/python3.6/dist-packages (from onnxruntime->farm==0.4.2) (1.6.0)\n",
"Requirement already satisfied: docutils<0.16,>=0.10 in /usr/local/lib/python3.6/dist-packages (from botocore<1.16.0,>=1.15.38->boto3->farm==0.4.2) (0.15.2)\n",
"Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn->sklearn->farm==0.4.2) (0.14.1)\n",
"Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->farm==0.4.2) (2.10.0)\n",
"Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->farm==0.4.2) (1.1.0)\n",
"Requirement already satisfied: keras-applications>=1.0.6 in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->farm==0.4.2) (1.0.8)\n",
"Requirement already satisfied: tabulate>=0.7.7 in /usr/local/lib/python3.6/dist-packages (from databricks-cli>=0.8.0->mlflow==1.0.0->farm==0.4.2) (0.8.7)\n",
"Requirement already satisfied: configparser>=0.3.5 in /usr/local/lib/python3.6/dist-packages (from databricks-cli>=0.8.0->mlflow==1.0.0->farm==0.4.2) (5.0.0)\n",
"Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.6/dist-packages (from gitpython>=2.1.0->mlflow==1.0.0->farm==0.4.2) (4.0.4)\n",
"Requirement already satisfied: websocket-client>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from docker>=3.6.0->mlflow==1.0.0->farm==0.4.2) (0.57.0)\n",
"Requirement already satisfied: Mako in /usr/local/lib/python3.6/dist-packages (from alembic->mlflow==1.0.0->farm==0.4.2) (1.1.2)\n",
"Requirement already satisfied: python-editor>=0.3 in /usr/local/lib/python3.6/dist-packages (from alembic->mlflow==1.0.0->farm==0.4.2) (1.0.4)\n",
"Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from Jinja2>=2.10.1->flask->farm==0.4.2) (1.1.1)\n",
"Requirement already satisfied: typing-extensions>=3.6.2.1 in /usr/local/lib/python3.6/dist-packages (from onnx>=1.2.3->onnxruntime->farm==0.4.2) (3.6.6)\n",
"Requirement already satisfied: smmap<4,>=3.0.1 in /usr/local/lib/python3.6/dist-packages (from gitdb<5,>=4.0.1->gitpython>=2.1.0->mlflow==1.0.0->farm==0.4.2) (3.0.2)\n",
"Installing collected packages: farm\n",
" Running setup.py develop for farm\n",
"Successfully installed farm\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "kBzm4YkZg6xl",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 364
},
"outputId": "4040f151-0e2e-4ead-b6f2-39fe7bdd56b1"
},
"source": [
"import logging\n",
"from pathlib import Path\n",
"\n",
"from farm.data_handler.data_silo import DataSilo\n",
"from farm.data_handler.processor import NERProcessor\n",
"from farm.modeling.optimization import initialize_optimizer\n",
"from farm.infer import Inferencer\n",
"from farm.modeling.adaptive_model import AdaptiveModel\n",
"from farm.modeling.language_model import LanguageModel\n",
"from farm.modeling.prediction_head import TokenClassificationHead\n",
"from farm.modeling.tokenization import Tokenizer\n",
"from farm.train import Trainer\n",
"from farm.utils import set_all_seeds, MLFlowLogger, initialize_device_settings\n",
"\n",
"logging.basicConfig(\n",
" format=\"%(asctime)s - %(levelname)s - %(name)s - %(message)s\",\n",
" datefmt=\"%m/%d/%Y %H:%M:%S\",\n",
" level=logging.INFO,\n",
")\n",
"\n",
"ml_logger = MLFlowLogger(tracking_uri=\"https://public-mlflow.deepset.ai/\")\n",
"ml_logger.init_experiment(experiment_name=\"Public_FARM\", run_name=\"Ren_en-ner-colb2\")\n"
],
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"text": [
"04/15/2020 20:11:05 - INFO - transformers.file_utils - PyTorch version 1.4.0 available.\n",
"04/15/2020 20:11:06 - INFO - transformers.file_utils - TensorFlow version 2.2.0-rc2 available.\n"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"\n",
" __ __ _ _ \n",
" \\ \\ / / | | | | \n",
" \\ \\ /\\ / /__| | ___ ___ _ __ ___ ___ | |_ ___ \n",
" \\ \\/ \\/ / _ \\ |/ __/ _ \\| '_ ` _ \\ / _ \\ | __/ _ \\ \n",
" \\ /\\ / __/ | (_| (_) | | | | | | __/ | || (_) |\n",
" \\/ \\/ \\___|_|\\___\\___/|_| |_| |_|\\___| \\__\\___/ \n",
" ______ _____ __ __ \n",
" | ____/\\ | __ \\| \\/ | _.-^-._ .--.\n",
" | |__ / \\ | |__) | \\ / | .-' _ '-. |__|\n",
" | __/ /\\ \\ | _ /| |\\/| | / |_| \\| |\n",
" | | / ____ \\| | \\ \\| | | | / \\ |\n",
" |_|/_/ \\_\\_| \\_\\_| |_| /| _____ |\\ |\n",
" | |==|==| | |\n",
"|---||---|---|---|---|---|---|---|---| |--|--| | |\n",
"|---||---|---|---|---|---|---|---|---| |==|==| | |\n",
"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" \n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "nQqRPoiahHTE",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000,
"referenced_widgets": [
"1b39397a3b7c4f46af0f78c302af8b73",
"1954a748853b45018b25be2dccca4e80",
"c60b9896692f41d0ac6f25bf6aa4ea24",
"9855be6e35d047b1806ca63798ef123e",
"7c9049195247414fb19297ab266e1539",
"7d7eb476372e4a3bb7f47b25431a6150",
"bbbd1a43688549968fd2ce8a924e403b",
"1d0391e4ee324d25a8cf93f4758fd62f"
]
},
"outputId": "31a5af11-0846-4499-b9c9-b4de618877ea"
},
"source": [
"##########################\n",
"########## Settings\n",
"##########################\n",
"set_all_seeds(seed=42)\n",
"device, n_gpu = initialize_device_settings(use_cuda=True)#, local_rank=-1, use_amp=None)\n",
"n_epochs = 2\n",
"# TODO gradient_accumulation_steps?\n",
"# TODO layer_dims?\n",
"warmup_proportion = 0.4\n",
"batch_size = 64\n",
"evaluate_every = 400\n",
"lang_model = \"bert-base-cased\"\n",
"do_lower_case = False\n",
"\n",
"# 1.Create a tokenizer\n",
"tokenizer = Tokenizer.load(\n",
" pretrained_model_name_or_path=lang_model, do_lower_case=do_lower_case\n",
")\n",
"\n",
"# 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset\n",
"ner_labels = [\"[PAD]\", \"X\", \"O\", \"B-MISC\", \"I-MISC\", \"B-PER\", \"I-PER\", \"B-ORG\", \"I-ORG\", \"B-LOC\", \"I-LOC\", \"B-OTH\", \"I-OTH\"]\n",
"\n",
"processor = NERProcessor(\n",
" tokenizer=tokenizer, max_seq_len=128, data_dir=Path(\"../data/conll03-en\"), delimiter=\" \", metric=\"seq_f1\", label_list=ner_labels\n",
")\n",
"\n",
"# 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets\n",
"data_silo = DataSilo(processor=processor, batch_size=batch_size)\n"
],
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"text": [
"04/15/2020 20:11:30 - INFO - farm.utils - device: cuda n_gpu: 1, distributed training: False, automatic mixed precision training: None\n",
"04/15/2020 20:11:30 - INFO - farm.modeling.tokenization - Loading tokenizer of type 'BertTokenizer'\n",
"04/15/2020 20:11:30 - INFO - filelock - Lock 139915751362176 acquired on /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1.lock\n",
"04/15/2020 20:11:30 - INFO - transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmp0l3ymdpt\n"
],
"name": "stderr"
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1b39397a3b7c4f46af0f78c302af8b73",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Downloading', max=213450, style=ProgressStyle(description_wid…"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"04/15/2020 20:11:30 - INFO - transformers.file_utils - storing https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt in cache at /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1\n",
"04/15/2020 20:11:30 - INFO - transformers.file_utils - creating metadata file for /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1\n",
"04/15/2020 20:11:30 - INFO - filelock - Lock 139915751362176 released on /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1.lock\n",
"04/15/2020 20:11:30 - INFO - transformers.tokenization_utils - loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt from cache at /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1\n"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"04/15/2020 20:11:31 - INFO - farm.data_handler.data_silo - \n",
"Loading data into the data silo ... \n",
" ______\n",
" |o | !\n",
" __ |:`_|---'-.\n",
" |__|______.-/ _ \\-----.| \n",
" (o)(o)------'\\ _ / ( ) \n",
" \n",
"04/15/2020 20:11:31 - INFO - farm.data_handler.data_silo - Loading train set from: ../data/conll03-en/train.txt \n",
"04/15/2020 20:11:31 - INFO - farm.data_handler.utils - Couldn't find ../data/conll03-en/train.txt locally. Trying to download ...\n",
"04/15/2020 20:11:31 - INFO - farm.data_handler.utils - downloading and extracting file conll03-en to dir /content/data\n",
"04/15/2020 20:11:32 - INFO - farm.data_handler.data_silo - Got ya 1 parallel workers to convert 14041 dictionaries to pytorch datasets (chunksize = 2000)...\n",
"04/15/2020 20:11:32 - INFO - farm.data_handler.data_silo - 0 \n",
"04/15/2020 20:11:32 - INFO - farm.data_handler.data_silo - /w\\\n",
"04/15/2020 20:11:32 - INFO - farm.data_handler.data_silo - /'\\\n",
"04/15/2020 20:11:32 - INFO - farm.data_handler.data_silo - \n",
"Preprocessing Dataset ../data/conll03-en/train.txt: 0%| | 0/14041 [00:00<?, ? Dicts/s]04/15/2020 20:11:34 - INFO - farm.data_handler.processor - *** Show 2 random examples ***\n",
"04/15/2020 20:11:34 - INFO - farm.data_handler.processor - \n",
"\n",
" .--. _____ _ \n",
" .'_\\/_'. / ____| | | \n",
" '. /\\ .' | (___ __ _ _ __ ___ _ __ | | ___ \n",
" \"||\" \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n",
" || /\\ ____) | (_| | | | | | | |_) | | __/\n",
" /\\ ||//\\) |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n",
" (/\\||/ |_| \n",
"______\\||/___________________________________________ \n",
"\n",
"ID: train-1283-0\n",
"Clear Text: \n",
" \ttext: ALKHAN-YURT , Russia 1996-08-22\n",
" \tner_label: ['B-LOC', 'O', 'B-LOC', 'O']\n",
"Tokenized: \n",
" \ttokens: ['AL', '##K', '##HA', '##N', '-', 'Y', '##UR', '##T', ',', 'Russia', '1996', '-', '08', '-', '22']\n",
" \toffsets: [0, 2, 3, 5, 6, 7, 8, 10, 12, 14, 21, 25, 26, 28, 29]\n",
" \tstart_of_word: [True, False, False, False, False, False, False, False, True, True, True, False, False, False, False]\n",
"Features: \n",
" \tinput_ids: [101, 18589, 2428, 11612, 2249, 118, 162, 19556, 1942, 117, 2733, 1820, 118, 4775, 118, 1659, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tinitial_mask: [0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tner_label_ids: [1, 9, 1, 1, 1, 1, 1, 1, 1, 2, 9, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"_____________________________________________________\n",
"04/15/2020 20:11:34 - INFO - farm.data_handler.processor - \n",
"\n",
" .--. _____ _ \n",
" .'_\\/_'. / ____| | | \n",
" '. /\\ .' | (___ __ _ _ __ ___ _ __ | | ___ \n",
" \"||\" \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n",
" || /\\ ____) | (_| | | | | | | |_) | | __/\n",
" /\\ ||//\\) |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n",
" (/\\||/ |_| \n",
"______\\||/___________________________________________ \n",
"\n",
"ID: train-252-0\n",
"Clear Text: \n",
" \ttext: Software Revenue 2,383 1,558 1,086 1,074\n",
" \tner_label: ['O', 'O', 'O', 'O', 'O', 'O']\n",
"Tokenized: \n",
" \ttokens: ['Software', 'Revenue', '2', ',', '38', '##3', '1', ',', '55', '##8', '1', ',', '08', '##6', '1', ',', '07', '##4']\n",
" \toffsets: [0, 9, 17, 18, 19, 21, 23, 24, 25, 27, 29, 30, 31, 33, 35, 36, 37, 39]\n",
" \tstart_of_word: [True, True, True, False, False, False, True, False, False, False, True, False, False, False, True, False, False, False]\n",
"Features: \n",
" \tinput_ids: [101, 10331, 16944, 123, 117, 3383, 1495, 122, 117, 3731, 1604, 122, 117, 4775, 1545, 122, 117, 5004, 1527, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tinitial_mask: [0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tner_label_ids: [1, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"_____________________________________________________\n",
"Preprocessing Dataset ../data/conll03-en/train.txt: 100%|██████████| 14041/14041 [00:13<00:00, 1058.31 Dicts/s]\n",
"04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo - Loading dev set from: ../data/conll03-en/dev.txt\n",
"04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo - Got ya 1 parallel workers to convert 3250 dictionaries to pytorch datasets (chunksize = 650)...\n",
"04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo - 0 \n",
"04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo - /w\\\n",
"04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo - / \\\n",
"04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo - \n",
"Preprocessing Dataset ../data/conll03-en/dev.txt: 0%| | 0/3250 [00:00<?, ? Dicts/s]04/15/2020 20:11:46 - INFO - farm.data_handler.processor - *** Show 2 random examples ***\n",
"04/15/2020 20:11:46 - INFO - farm.data_handler.processor - \n",
"\n",
" .--. _____ _ \n",
" .'_\\/_'. / ____| | | \n",
" '. /\\ .' | (___ __ _ _ __ ___ _ __ | | ___ \n",
" \"||\" \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n",
" || /\\ ____) | (_| | | | | | | |_) | | __/\n",
" /\\ ||//\\) |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n",
" (/\\||/ |_| \n",
"______\\||/___________________________________________ \n",
"\n",
"ID: train-259-0\n",
"Clear Text: \n",
" \ttext: HOUSTON AT PITTSBURGH\n",
" \tner_label: ['B-ORG', 'O', 'B-LOC']\n",
"Tokenized: \n",
" \ttokens: ['H', '##O', '##US', '##TO', '##N', 'AT', 'P', '##IT', '##TS', '##B', '##UR', '##G', '##H']\n",
" \toffsets: [0, 1, 2, 4, 6, 8, 11, 12, 14, 16, 17, 19, 20]\n",
" \tstart_of_word: [True, False, False, False, False, True, True, False, False, False, False, False, False]\n",
"Features: \n",
" \tinput_ids: [101, 145, 2346, 13329, 18082, 2249, 13020, 153, 12150, 11365, 2064, 19556, 2349, 3048, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tinitial_mask: [0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tner_label_ids: [1, 7, 1, 1, 1, 1, 2, 9, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"_____________________________________________________\n",
"04/15/2020 20:11:46 - INFO - farm.data_handler.processor - \n",
"\n",
" .--. _____ _ \n",
" .'_\\/_'. / ____| | | \n",
" '. /\\ .' | (___ __ _ _ __ ___ _ __ | | ___ \n",
" \"||\" \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n",
" || /\\ ____) | (_| | | | | | | |_) | | __/\n",
" /\\ ||//\\) |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n",
" (/\\||/ |_| \n",
"______\\||/___________________________________________ \n",
"\n",
"ID: train-92-0\n",
"Clear Text: \n",
" \ttext: Red Star ( Yugoslavia ) beat Dinamo ( Russia ) 92-90 ( halftime\n",
" \tner_label: ['B-ORG', 'I-ORG', 'O', 'B-LOC', 'O', 'O', 'B-ORG', 'O', 'B-LOC', 'O', 'O', 'O', 'O']\n",
"Tokenized: \n",
" \ttokens: ['Red', 'Star', '(', 'Yugoslavia', ')', 'beat', 'Dinamo', '(', 'Russia', ')', '92', '-', '90', '(', 'halftime']\n",
" \toffsets: [0, 4, 9, 11, 22, 24, 29, 36, 38, 45, 47, 49, 50, 53, 55]\n",
" \tstart_of_word: [True, True, True, True, True, True, True, True, True, True, True, False, False, True, True]\n",
"Features: \n",
" \tinput_ids: [101, 2156, 2537, 113, 8575, 114, 3222, 24780, 113, 2733, 114, 5556, 118, 3078, 113, 26077, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tinitial_mask: [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tner_label_ids: [1, 7, 8, 2, 9, 2, 2, 7, 2, 9, 2, 2, 1, 1, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"_____________________________________________________\n",
"Preprocessing Dataset ../data/conll03-en/dev.txt: 100%|██████████| 3250/3250 [00:03<00:00, 1028.94 Dicts/s]\n",
"04/15/2020 20:11:48 - INFO - farm.data_handler.data_silo - Loading test set from: ../data/conll03-en/test.txt\n",
"04/15/2020 20:11:49 - INFO - farm.data_handler.data_silo - Got ya 1 parallel workers to convert 3453 dictionaries to pytorch datasets (chunksize = 691)...\n",
"04/15/2020 20:11:49 - INFO - farm.data_handler.data_silo - 0 \n",
"04/15/2020 20:11:49 - INFO - farm.data_handler.data_silo - /w\\\n",
"04/15/2020 20:11:49 - INFO - farm.data_handler.data_silo - /'\\\n",
"04/15/2020 20:11:49 - INFO - farm.data_handler.data_silo - \n",
"Preprocessing Dataset ../data/conll03-en/test.txt: 0%| | 0/3453 [00:00<?, ? Dicts/s]04/15/2020 20:11:49 - INFO - farm.data_handler.processor - *** Show 2 random examples ***\n",
"04/15/2020 20:11:49 - INFO - farm.data_handler.processor - \n",
"\n",
" .--. _____ _ \n",
" .'_\\/_'. / ____| | | \n",
" '. /\\ .' | (___ __ _ _ __ ___ _ __ | | ___ \n",
" \"||\" \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n",
" || /\\ ____) | (_| | | | | | | |_) | | __/\n",
" /\\ ||//\\) |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n",
" (/\\||/ |_| \n",
"______\\||/___________________________________________ \n",
"\n",
"ID: train-573-0\n",
"Clear Text: \n",
" \ttext: Feyenoord 17 11 3 3 29 20 36\n",
" \tner_label: ['B-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'O']\n",
"Tokenized: \n",
" \ttokens: ['Fe', '##ye', '##no', '##ord', '17', '11', '3', '3', '29', '20', '36']\n",
" \toffsets: [0, 2, 4, 6, 10, 13, 16, 18, 20, 23, 26]\n",
" \tstart_of_word: [True, False, False, False, True, True, True, True, True, True, True]\n",
"Features: \n",
" \tinput_ids: [101, 11907, 4980, 2728, 6944, 1542, 1429, 124, 124, 1853, 1406, 3164, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tinitial_mask: [0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tner_label_ids: [1, 7, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"_____________________________________________________\n",
"04/15/2020 20:11:49 - INFO - farm.data_handler.processor - \n",
"\n",
" .--. _____ _ \n",
" .'_\\/_'. / ____| | | \n",
" '. /\\ .' | (___ __ _ _ __ ___ _ __ | | ___ \n",
" \"||\" \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n",
" || /\\ ____) | (_| | | | | | | |_) | | __/\n",
" /\\ ||//\\) |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n",
" (/\\||/ |_| \n",
"______\\||/___________________________________________ \n",
"\n",
"ID: train-601-0\n",
"Clear Text: \n",
" \ttext: Bayern Munich 16 9 6 1 26 14 33\n",
" \tner_label: ['B-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'O']\n",
"Tokenized: \n",
" \ttokens: ['Bayern', 'Munich', '16', '9', '6', '1', '26', '14', '33']\n",
" \toffsets: [0, 7, 14, 17, 19, 21, 23, 26, 29]\n",
" \tstart_of_word: [True, True, True, True, True, True, True, True, True]\n",
"Features: \n",
" \tinput_ids: [101, 23517, 6947, 1479, 130, 127, 122, 1744, 1489, 3081, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tinitial_mask: [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tner_label_ids: [1, 7, 8, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"_____________________________________________________\n",
"Preprocessing Dataset ../data/conll03-en/test.txt: 100%|██████████| 3453/3453 [00:02<00:00, 1183.91 Dicts/s]\n",
"04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo - Examples in train: 14041\n",
"04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo - Examples in dev : 3250\n",
"04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo - Examples in test : 3453\n",
"04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo - \n",
"04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo - Longest sequence length observed after clipping: 128\n",
"04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo - Average sequence length after clipping: 21.411010611779787\n",
"04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo - Proportion clipped: 7.121999857560003e-05\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "qJEnJbStibuG",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 448,
"referenced_widgets": [
"aacfc007c1f5457c8f54b30ec66863b1",
"40ff435523b04903aad8e65507a662d3",
"2ce03980b8eb47e3bc4def1a25f9f150",
"3401c4064ff449ad9b7a785f85629ec1",
"0b3502a0bc304f51a13df06516697210",
"bc38e1ba9e1e47d0846c62ab55042118",
"58a7a998b3bd48b8b273fca4a7d628aa",
"de3726de50c84a459b04df3a2e2710bd",
"897ecef9f62e46449dc5da3677f31a51",
"30ce00d9d7254e42a75f6eb1bd3aea56",
"6e04f43cc22143a49d165fa78cdebf1d",
"34078f982a4a49fb87d858d6b926d26c",
"ee65e576985f4b9f975805df04c2e56a",
"13d9851cad514f188a2281b5f81827e0",
"ef25ad38764f424284acc290ab4208e1",
"17c4e90625074475a3cba8ad8f50a527"
]
},
"outputId": "0bef4321-55d7-4c6c-fb87-d9305be4268b"
},
"source": [
"\n",
"# 4. Create an AdaptiveModel\n",
"# a) which consists of a pretrained language model as a basis\n",
"language_model = LanguageModel.load(lang_model)\n",
"# b) and a prediction head on top that is suited for our task => NER\n",
"prediction_head = TokenClassificationHead(num_labels=len(ner_labels))\n",
"\n",
"model = AdaptiveModel(\n",
" language_model=language_model,\n",
" prediction_heads=[prediction_head],\n",
" embeds_dropout_prob=0.1,\n",
" lm_output_types=[\"per_token\"],\n",
" device=device,\n",
")\n",
"\n",
"# 5. Create an optimizer\n",
"model, optimizer, lr_schedule = initialize_optimizer(\n",
" model=model,\n",
" learning_rate=5e-5,\n",
" schedule_opts={\"name\": \"LinearWarmup\", \"warmup_proportion\": warmup_proportion},\n",
" n_batches=len(data_silo.loaders[\"train\"]),\n",
" n_epochs=n_epochs,\n",
" device=device,\n",
")\n",
"\n",
"# 6. Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time\n",
"trainer = Trainer(\n",
" model=model,\n",
" optimizer=optimizer,\n",
" data_silo=data_silo,\n",
" epochs=n_epochs,\n",
" n_gpu=n_gpu,\n",
" lr_schedule=lr_schedule,\n",
" evaluate_every=evaluate_every,\n",
" device=device,\n",
")\n"
],
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"text": [
"04/15/2020 20:12:49 - INFO - filelock - Lock 139915603172432 acquired on /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.3d5adf10d3445c36ce131f4c6416aa62e9b58e1af56b97664773f4858a46286e.lock\n",
"04/15/2020 20:12:49 - INFO - transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmp0axpxuxr\n"
],
"name": "stderr"
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "aacfc007c1f5457c8f54b30ec66863b1",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Downloading', max=361, style=ProgressStyle(description_width=…"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"04/15/2020 20:12:49 - INFO - transformers.file_utils - storing https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json in cache at /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.3d5adf10d3445c36ce131f4c6416aa62e9b58e1af56b97664773f4858a46286e\n",
"04/15/2020 20:12:49 - INFO - transformers.file_utils - creating metadata file for /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.3d5adf10d3445c36ce131f4c6416aa62e9b58e1af56b97664773f4858a46286e\n",
"04/15/2020 20:12:49 - INFO - filelock - Lock 139915603172432 released on /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.3d5adf10d3445c36ce131f4c6416aa62e9b58e1af56b97664773f4858a46286e.lock\n",
"04/15/2020 20:12:49 - INFO - filelock - Lock 139915764977336 acquired on /root/.cache/torch/transformers/35d8b9d36faaf46728a0192d82bf7d00137490cd6074e8500778afed552a67e5.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2.lock\n",
"04/15/2020 20:12:49 - INFO - transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmprdof9097\n"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "897ecef9f62e46449dc5da3677f31a51",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Downloading', max=435779157, style=ProgressStyle(description_…"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"04/15/2020 20:12:56 - INFO - transformers.file_utils - storing https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-pytorch_model.bin in cache at /root/.cache/torch/transformers/35d8b9d36faaf46728a0192d82bf7d00137490cd6074e8500778afed552a67e5.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2\n",
"04/15/2020 20:12:56 - INFO - transformers.file_utils - creating metadata file for /root/.cache/torch/transformers/35d8b9d36faaf46728a0192d82bf7d00137490cd6074e8500778afed552a67e5.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2\n",
"04/15/2020 20:12:56 - INFO - filelock - Lock 139915764977336 released on /root/.cache/torch/transformers/35d8b9d36faaf46728a0192d82bf7d00137490cd6074e8500778afed552a67e5.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2.lock\n",
"04/15/2020 20:12:56 - INFO - transformers.modeling_utils - loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-pytorch_model.bin from cache at /root/.cache/torch/transformers/35d8b9d36faaf46728a0192d82bf7d00137490cd6074e8500778afed552a67e5.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2\n"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"04/15/2020 20:12:59 - WARNING - farm.modeling.language_model - Could not automatically detect from language model name what language it is. \n",
"\t We guess it's an *ENGLISH* model ... \n",
"\t If not: Init the language model by supplying the 'language' param.\n",
"04/15/2020 20:12:59 - INFO - farm.modeling.prediction_head - Prediction head initialized with size [768, 13]\n",
"04/15/2020 20:13:16 - INFO - farm.modeling.optimization - Loading optimizer `TransformersAdamW`: '{'correct_bias': False, 'weight_decay': 0.01, 'lr': 5e-05}'\n",
"04/15/2020 20:13:16 - INFO - farm.modeling.optimization - Using scheduler 'get_linear_schedule_with_warmup'\n",
"04/15/2020 20:13:17 - INFO - farm.modeling.optimization - Loading schedule `get_linear_schedule_with_warmup`: '{'num_training_steps': 440, 'num_warmup_steps': 176}'\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "LdUzNWzkhL9F",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "9d9420bb-996b-4804-9df1-131fabc0466b"
},
"source": [
"\n",
"# 7. Let it grow\n",
"trainer.train()\n"
],
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"text": [
"04/15/2020 20:13:19 - INFO - farm.train - \n",
" \n",
"\n",
" &&& && & && _____ _ \n",
" && &\\/&\\|& ()|/ @, && / ____| (_) \n",
" &\\/(/&/&||/& /_/)_&/_& | | __ _ __ _____ ___ _ __ __ _ \n",
" &() &\\/&|()|/&\\/ '%\" & () | | |_ | '__/ _ \\ \\ /\\ / / | '_ \\ / _` |\n",
" &_\\_&&_\\ |& |&&/&__%_/_& && | |__| | | | (_) \\ V V /| | | | | (_| |\n",
"&& && & &| &| /& & % ()& /&& \\_____|_| \\___/ \\_/\\_/ |_|_| |_|\\__, |\n",
" ()&_---()&\\&\\|&&-&&--%---()~ __/ |\n",
" && \\||| |___/\n",
" |||\n",
" |||\n",
" |||\n",
" , -=-~ .-^- _\n",
" `\n",
"\n",
"Train epoch 0/2 (Cur. train loss: 0.0387): 100%|██████████| 220/220 [02:46<00:00, 1.32it/s]\n",
"Train epoch 1/2 (Cur. train loss: 0.0323): 82%|████████▏ | 180/220 [02:15<00:28, 1.39it/s]\n",
"Evaluating: 0%| | 0/51 [00:00<?, ?it/s]\u001b[A\n",
"Evaluating: 100%|██████████| 51/51 [00:18<00:00, 2.71it/s]\n",
"04/15/2020 20:18:41 - INFO - farm.eval - \n",
"\n",
"\\\\|// \\\\|// \\\\|// \\\\|// \\\\|//\n",
"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
"***************************************************\n",
"***** EVALUATION | DEV SET | AFTER 400 BATCHES *****\n",
"***************************************************\n",
"\\\\|// \\\\|// \\\\|// \\\\|// \\\\|//\n",
"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
"\n",
"04/15/2020 20:18:41 - INFO - farm.eval - \n",
" _________ ner _________\n",
"04/15/2020 20:18:42 - INFO - farm.eval - loss: 0.6635722540341891\n",
"04/15/2020 20:18:42 - INFO - farm.eval - task_name: ner\n",
"04/15/2020 20:18:42 - INFO - farm.eval - seq_f1: 0.939728779507785\n",
"04/15/2020 20:18:42 - INFO - farm.eval - report: \n",
" precision recall f1-score support\n",
"\n",
" LOC 0.96 0.96 0.96 1837\n",
" MISC 0.89 0.88 0.89 922\n",
" PER 0.97 0.97 0.97 1836\n",
" ORG 0.90 0.93 0.91 1341\n",
"\n",
"micro avg 0.93 0.95 0.94 5936\n",
"macro avg 0.94 0.95 0.94 5936\n",
"\n",
"Train epoch 1/2 (Cur. train loss: 0.0081): 100%|██████████| 220/220 [03:05<00:00, 1.19it/s]\n",
"Evaluating: 100%|██████████| 54/54 [00:19<00:00, 2.71it/s]\n",
"04/15/2020 20:19:31 - INFO - farm.eval - \n",
"\n",
"\\\\|// \\\\|// \\\\|// \\\\|// \\\\|//\n",
"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
"***************************************************\n",
"***** EVALUATION | TEST SET | AFTER 440 BATCHES *****\n",
"***************************************************\n",
"\\\\|// \\\\|// \\\\|// \\\\|// \\\\|//\n",
"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
"\n",
"04/15/2020 20:19:31 - INFO - farm.eval - \n",
" _________ ner _________\n",
"04/15/2020 20:19:32 - INFO - farm.eval - loss: 1.3053936329987164\n",
"04/15/2020 20:19:32 - INFO - farm.eval - task_name: ner\n",
"04/15/2020 20:19:32 - INFO - farm.eval - seq_f1: 0.9011739968459787\n",
"04/15/2020 20:19:32 - INFO - farm.eval - report: \n",
" precision recall f1-score support\n",
"\n",
" ORG 0.86 0.90 0.88 1661\n",
" PER 0.96 0.95 0.95 1615\n",
" LOC 0.93 0.92 0.93 1666\n",
" MISC 0.75 0.82 0.79 702\n",
"\n",
"micro avg 0.89 0.91 0.90 5644\n",
"macro avg 0.90 0.91 0.90 5644\n",
"\n"
],
"name": "stderr"
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"AdaptiveModel(\n",
" (language_model): Bert(\n",
" (model): BertModel(\n",
" (embeddings): BertEmbeddings(\n",
" (word_embeddings): Embedding(28996, 768, padding_idx=0)\n",
" (position_embeddings): Embedding(512, 768)\n",
" (token_type_embeddings): Embedding(2, 768)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" (encoder): BertEncoder(\n",
" (layer): ModuleList(\n",
" (0): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (1): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (2): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (3): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (4): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (5): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (6): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (7): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (8): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (9): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (10): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (11): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (pooler): BertPooler(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (activation): Tanh()\n",
" )\n",
" )\n",
" )\n",
" (prediction_heads): ModuleList(\n",
" (0): TokenClassificationHead(\n",
" (feed_forward): FeedForwardBlock(\n",
" (feed_forward): Sequential(\n",
" (0): Linear(in_features=768, out_features=13, bias=True)\n",
" )\n",
" )\n",
" (loss_fct): CrossEntropyLoss()\n",
" )\n",
" )\n",
" (dropout): Dropout(p=0.1, inplace=False)\n",
")"
]
},
"metadata": {
"tags": []
},
"execution_count": 10
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "sQTxu3_1i_Tp",
"colab_type": "code",
"colab": {}
},
"source": [
"\n",
"# 8. Hooray! You have a model. Store it:\n",
"save_dir = \"saved_models_bert-en-ner3\"\n",
"model.save(save_dir)\n",
"processor.save(save_dir)\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "EvBGMJK3li9k",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "6e2fccde-a6d3-495f-c46c-4213bd3c040f"
},
"source": [
"model = Inferencer.load(save_dir)\n",
"\n",
"# 9. Load it & harvest your fruits (Inference)\n",
"basic_texts = [\n",
" {\"text\": \"I love it when I see Obama in Japan.\"},\n",
" {\"text\": \"Not limiting global temperature rise to 1.5 degrees would mean trillions of dollars in economic losses, heat extremes in all inhabited parts of the planet, die-off of large parts of the Amazon rainforest, and millions of climate refugees.\"},\n",
" {\"text\": \"August 22, 2019 Amazon’s new plastic packaging has caused outrage among customers and environmental activists who’ve branded it major step backwards.\"},\n",
"]\n",
"\n",
"result = model.inference_from_dicts(dicts=basic_texts)\n",
"print(result)\n"
],
"execution_count": 13,
"outputs": [
{
"output_type": "stream",
"text": [
"04/15/2020 20:20:12 - INFO - farm.utils - device: cpu n_gpu: 0, distributed training: False, automatic mixed precision training: None\n",
"04/15/2020 20:20:12 - INFO - transformers.modeling_utils - loading weights file saved_models_bert-en-ner3/language_model.bin from cache at saved_models_bert-en-ner3/language_model.bin\n",
"04/15/2020 20:20:14 - INFO - farm.modeling.adaptive_model - Found files for loading 1 prediction heads\n",
"04/15/2020 20:20:14 - WARNING - farm.modeling.prediction_head - `layer_dims` will be deprecated in future releases\n",
"04/15/2020 20:20:14 - INFO - farm.modeling.prediction_head - Prediction head initialized with size [768, 13]\n",
"04/15/2020 20:20:14 - INFO - farm.modeling.prediction_head - Loading prediction head from saved_models_bert-en-ner3/prediction_head_0.bin\n",
"04/15/2020 20:20:14 - WARNING - farm.modeling.adaptive_model - ML logging didn't work: INVALID_PARAMETER_VALUE: Changing param value is not allowed. Param with key='lm_name' was already logged with value='bert-base-cased' for run ID='7c43c4979601470298f408ded7556409. Attempted logging new value 'saved_models_bert-en-ner3'.\n",
"04/15/2020 20:20:14 - INFO - transformers.tokenization_utils - Model name 'saved_models_bert-en-ner3' not found in model shortcut name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese, bert-base-german-cased, bert-large-uncased-whole-word-masking, bert-large-cased-whole-word-masking, bert-large-uncased-whole-word-masking-finetuned-squad, bert-large-cased-whole-word-masking-finetuned-squad, bert-base-cased-finetuned-mrpc, bert-base-german-dbmdz-cased, bert-base-german-dbmdz-uncased, bert-base-finnish-cased-v1, bert-base-finnish-uncased-v1, bert-base-dutch-cased). Assuming 'saved_models_bert-en-ner3' is a path, a model identifier, or url to a directory containing tokenizer files.\n",
"04/15/2020 20:20:14 - INFO - transformers.tokenization_utils - Didn't find file saved_models_bert-en-ner3/added_tokens.json. We won't load it.\n",
"04/15/2020 20:20:14 - INFO - transformers.tokenization_utils - loading file saved_models_bert-en-ner3/vocab.txt\n",
"04/15/2020 20:20:14 - INFO - transformers.tokenization_utils - loading file None\n",
"04/15/2020 20:20:14 - INFO - transformers.tokenization_utils - loading file saved_models_bert-en-ner3/special_tokens_map.json\n",
"04/15/2020 20:20:14 - INFO - transformers.tokenization_utils - loading file saved_models_bert-en-ner3/tokenizer_config.json\n",
"04/15/2020 20:20:15 - INFO - farm.data_handler.processor - Initialized processor without tasks. Supply `metric` and `label_list` to the constructor for using the default task or add a custom task later via processor.add_task()\n",
"04/15/2020 20:20:15 - INFO - farm.utils - device: cpu n_gpu: 0, distributed training: False, automatic mixed precision training: None\n",
"04/15/2020 20:20:15 - INFO - farm.infer - Got ya 1 parallel workers to do inference on dicts (chunksize = 4)...\n",
"04/15/2020 20:20:15 - INFO - farm.infer - 0 \n",
"04/15/2020 20:20:15 - INFO - farm.infer - /w\\\n",
"04/15/2020 20:20:15 - INFO - farm.infer - /'\\\n",
"04/15/2020 20:20:15 - INFO - farm.infer - \n",
"04/15/2020 20:20:15 - WARNING - farm.data_handler.input_features - [Task: ner] Could not convert labels to ids via label_list!\n",
"If your are running in *inference* mode: Don't worry!\n",
"If you are running in *training* mode: Verify you are supplying a proper label list to your processor and check that labels in input data are correct.\n",
"04/15/2020 20:20:15 - WARNING - farm.data_handler.input_features - [Task: ner] Could not convert labels to ids via label_list!\n",
"If your are running in *inference* mode: Don't worry!\n",
"If you are running in *training* mode: Verify you are supplying a proper label list to your processor and check that labels in input data are correct.\n",
"04/15/2020 20:20:15 - WARNING - farm.data_handler.input_features - [Task: ner] Could not convert labels to ids via label_list!\n",
"If your are running in *inference* mode: Don't worry!\n",
"If you are running in *training* mode: Verify you are supplying a proper label list to your processor and check that labels in input data are correct.\n",
"04/15/2020 20:20:15 - INFO - farm.data_handler.processor - *** Show 2 random examples ***\n",
"04/15/2020 20:20:15 - INFO - farm.data_handler.processor - \n",
"\n",
" .--. _____ _ \n",
" .'_\\/_'. / ____| | | \n",
" '. /\\ .' | (___ __ _ _ __ ___ _ __ | | ___ \n",
" \"||\" \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n",
" || /\\ ____) | (_| | | | | | | |_) | | __/\n",
" /\\ ||//\\) |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n",
" (/\\||/ |_| \n",
"______\\||/___________________________________________ \n",
"\n",
"ID: train-2-0\n",
"Clear Text: \n",
" \ttext: August 22, 2019 Amazon’s new plastic packaging has caused outrage among customers and environmental activists who’ve branded it major step backwards.\n",
"Tokenized: \n",
" \ttokens: ['August', '22', ',', '2019', 'Amazon', '’', 's', 'new', 'plastic', 'packaging', 'has', 'caused', 'outrage', 'among', 'customers', 'and', 'environmental', 'activists', 'who', '’', 've', 'branded', 'it', 'major', 'step', 'backwards', '.']\n",
" \toffsets: [0, 7, 9, 11, 16, 22, 23, 25, 29, 37, 47, 51, 58, 66, 72, 82, 86, 100, 110, 113, 114, 117, 125, 128, 134, 139, 148]\n",
" \tstart_of_word: [True, True, False, True, True, False, False, True, True, True, True, True, True, True, True, True, True, True, True, False, False, True, True, True, True, True, False]\n",
"Features: \n",
" \tinput_ids: [101, 1360, 1659, 117, 10351, 9786, 787, 188, 1207, 5828, 17019, 1144, 2416, 22052, 1621, 5793, 1105, 4801, 10254, 1150, 787, 1396, 11450, 1122, 1558, 2585, 11316, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tinitial_mask: [0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"_____________________________________________________\n",
"04/15/2020 20:20:15 - INFO - farm.data_handler.processor - \n",
"\n",
" .--. _____ _ \n",
" .'_\\/_'. / ____| | | \n",
" '. /\\ .' | (___ __ _ _ __ ___ _ __ | | ___ \n",
" \"||\" \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n",
" || /\\ ____) | (_| | | | | | | |_) | | __/\n",
" /\\ ||//\\) |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n",
" (/\\||/ |_| \n",
"______\\||/___________________________________________ \n",
"\n",
"ID: train-0-0\n",
"Clear Text: \n",
" \ttext: I love it when I see Obama in Japan.\n",
"Tokenized: \n",
" \ttokens: ['I', 'love', 'it', 'when', 'I', 'see', 'Obama', 'in', 'Japan', '.']\n",
" \toffsets: [0, 2, 7, 10, 15, 17, 21, 27, 30, 35]\n",
" \tstart_of_word: [True, True, True, True, True, True, True, True, True, False]\n",
"Features: \n",
" \tinput_ids: [101, 146, 1567, 1122, 1165, 146, 1267, 7661, 1107, 1999, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
" \tinitial_mask: [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"_____________________________________________________\n"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"[{'task': 'ner', 'predictions': [{'start': 21, 'end': 26, 'context': 'Obama', 'label': 'PER', 'probability': 0.99587303}, {'start': 30, 'end': 36, 'context': 'Japan.', 'label': 'LOC', 'probability': 0.9992192}, {'start': 186, 'end': 192, 'context': 'Amazon', 'label': 'LOC', 'probability': 0.99963367}, {'start': 16, 'end': 24, 'context': 'Amazon’s', 'label': 'ORG', 'probability': 0.99936336}]}]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "6kiNl6BZlZwt",
"colab_type": "code",
"colab": {}
},
"source": [
"print(result)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "dGFLE2z9l6qy",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment