Last active
April 15, 2020 20:22
-
-
Save renaud/2317b0c8e6d4ced7abd8088f5594c547 to your computer and use it in GitHub Desktop.
FARM_en_NER.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "FARM_en_NER.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"mount_file_id": "1qqewywPJDcrT4vTNBbkEJGO6ZUHievNO", | |
"authorship_tag": "ABX9TyNUVWmo35Cs8dOpEfoZBRt6", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"accelerator": "GPU", | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"1b39397a3b7c4f46af0f78c302af8b73": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_1954a748853b45018b25be2dccca4e80", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_c60b9896692f41d0ac6f25bf6aa4ea24", | |
"IPY_MODEL_9855be6e35d047b1806ca63798ef123e" | |
] | |
} | |
}, | |
"1954a748853b45018b25be2dccca4e80": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"c60b9896692f41d0ac6f25bf6aa4ea24": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "IntProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_7c9049195247414fb19297ab266e1539", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "IntProgressModel", | |
"bar_style": "success", | |
"max": 213450, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 213450, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_7d7eb476372e4a3bb7f47b25431a6150" | |
} | |
}, | |
"9855be6e35d047b1806ca63798ef123e": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_bbbd1a43688549968fd2ce8a924e403b", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 213k/213k [00:00<00:00, 1.80MB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_1d0391e4ee324d25a8cf93f4758fd62f" | |
} | |
}, | |
"7c9049195247414fb19297ab266e1539": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"7d7eb476372e4a3bb7f47b25431a6150": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"bbbd1a43688549968fd2ce8a924e403b": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"1d0391e4ee324d25a8cf93f4758fd62f": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"aacfc007c1f5457c8f54b30ec66863b1": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_40ff435523b04903aad8e65507a662d3", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_2ce03980b8eb47e3bc4def1a25f9f150", | |
"IPY_MODEL_3401c4064ff449ad9b7a785f85629ec1" | |
] | |
} | |
}, | |
"40ff435523b04903aad8e65507a662d3": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"2ce03980b8eb47e3bc4def1a25f9f150": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "IntProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_0b3502a0bc304f51a13df06516697210", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "IntProgressModel", | |
"bar_style": "success", | |
"max": 361, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 361, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_bc38e1ba9e1e47d0846c62ab55042118" | |
} | |
}, | |
"3401c4064ff449ad9b7a785f85629ec1": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_58a7a998b3bd48b8b273fca4a7d628aa", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 361/361 [00:00<00:00, 2.83kB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_de3726de50c84a459b04df3a2e2710bd" | |
} | |
}, | |
"0b3502a0bc304f51a13df06516697210": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"bc38e1ba9e1e47d0846c62ab55042118": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"58a7a998b3bd48b8b273fca4a7d628aa": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"de3726de50c84a459b04df3a2e2710bd": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"897ecef9f62e46449dc5da3677f31a51": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_30ce00d9d7254e42a75f6eb1bd3aea56", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_6e04f43cc22143a49d165fa78cdebf1d", | |
"IPY_MODEL_34078f982a4a49fb87d858d6b926d26c" | |
] | |
} | |
}, | |
"30ce00d9d7254e42a75f6eb1bd3aea56": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"6e04f43cc22143a49d165fa78cdebf1d": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "IntProgressModel", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_ee65e576985f4b9f975805df04c2e56a", | |
"_dom_classes": [], | |
"description": "Downloading: 100%", | |
"_model_name": "IntProgressModel", | |
"bar_style": "success", | |
"max": 435779157, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 435779157, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_13d9851cad514f188a2281b5f81827e0" | |
} | |
}, | |
"34078f982a4a49fb87d858d6b926d26c": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_ef25ad38764f424284acc290ab4208e1", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 436M/436M [00:07<00:00, 58.3MB/s]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_17c4e90625074475a3cba8ad8f50a527" | |
} | |
}, | |
"ee65e576985f4b9f975805df04c2e56a": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "initial", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"13d9851cad514f188a2281b5f81827e0": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"ef25ad38764f424284acc290ab4208e1": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"17c4e90625074475a3cba8ad8f50a527": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
} | |
} | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/renaud/2317b0c8e6d4ced7abd8088f5594c547/farm_en_ner.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "xyO45RHBgjI0", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 139 | |
}, | |
"outputId": "1565a4ae-7f82-4d8c-978a-1a5c0c2edf66" | |
}, | |
"source": [ | |
"# https://github.com/deepset-ai/FARM#basic-usage\n", | |
"!git clone https://github.com/deepset-ai/FARM.git" | |
], | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Cloning into 'FARM'...\n", | |
"remote: Enumerating objects: 85, done.\u001b[K\n", | |
"remote: Counting objects: 100% (85/85), done.\u001b[K\n", | |
"remote: Compressing objects: 100% (71/71), done.\u001b[K\n", | |
"remote: Total 4933 (delta 47), reused 32 (delta 14), pack-reused 4848\u001b[K\n", | |
"Receiving objects: 100% (4933/4933), 64.95 MiB | 24.13 MiB/s, done.\n", | |
"Resolving deltas: 100% (3668/3668), done.\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "qXzI39Jd1SsY", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "18bd6ed9-4ccd-4114-8804-80a05843b162" | |
}, | |
"source": [ | |
"cd FARM\n" | |
], | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/content/FARM\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "eunTBTOW1tBb", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 1000 | |
}, | |
"outputId": "7db64161-8e55-45f1-ebc2-a93f48ed548d" | |
}, | |
"source": [ | |
"!pip install -r requirements.txt\n", | |
"!pip install --editable .\n" | |
], | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 2)) (46.1.3)\n", | |
"Requirement already satisfied: wheel in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 3)) (0.34.2)\n", | |
"Requirement already satisfied: torch>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 5)) (1.4.0)\n", | |
"Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 7)) (4.38.0)\n", | |
"Requirement already satisfied: boto3 in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 9)) (1.12.38)\n", | |
"Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 11)) (2.21.0)\n", | |
"Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 13)) (1.4.1)\n", | |
"Requirement already satisfied: sklearn in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 14)) (0.0)\n", | |
"Collecting seqeval\n", | |
" Downloading https://files.pythonhosted.org/packages/34/91/068aca8d60ce56dd9ba4506850e876aba5e66a6f2f29aa223224b50df0de/seqeval-0.0.12.tar.gz\n", | |
"Collecting mlflow==1.0.0\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/01/ec/8c9448968d4662e8354b9c3a62e635f8929ed507a45af3d9fdb84be51270/mlflow-1.0.0-py3-none-any.whl (47.7MB)\n", | |
"\u001b[K |████████████████████████████████| 47.7MB 61kB/s \n", | |
"\u001b[?25hCollecting transformers==2.7.0\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/37/ba/dda44bbf35b071441635708a3dd568a5ca6bf29f77389f7c7c6818ae9498/transformers-2.7.0-py3-none-any.whl (544kB)\n", | |
"\u001b[K |████████████████████████████████| 552kB 44.7MB/s \n", | |
"\u001b[?25hCollecting dotmap==1.3.0\n", | |
" Downloading https://files.pythonhosted.org/packages/fa/eb/ee5f0358a9e0ede90308d8f34e697e122f191c2702dc4f614eca7770b1eb/dotmap-1.3.0-py3-none-any.whl\n", | |
"Collecting Werkzeug==0.16.1\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/c2/e4/a859d2fe516f466642fa5c6054fd9646271f9da26b0cac0d2f37fc858c8f/Werkzeug-0.16.1-py2.py3-none-any.whl (327kB)\n", | |
"\u001b[K |████████████████████████████████| 327kB 33.7MB/s \n", | |
"\u001b[?25hRequirement already satisfied: flask in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 24)) (1.1.2)\n", | |
"Collecting flask-restplus\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/c2/a6/b17c848771f96ad039ad9e3ea275e842a16c39c4f3eb9f60ee330b20b6c2/flask_restplus-0.13.0-py2.py3-none-any.whl (2.5MB)\n", | |
"\u001b[K |████████████████████████████████| 2.5MB 46.2MB/s \n", | |
"\u001b[?25hCollecting flask-cors\n", | |
" Downloading https://files.pythonhosted.org/packages/78/38/e68b11daa5d613e3a91e4bf3da76c94ac9ee0d9cd515af9c1ab80d36f709/Flask_Cors-3.0.8-py2.py3-none-any.whl\n", | |
"Requirement already satisfied: dill in /usr/local/lib/python3.6/dist-packages (from -r requirements.txt (line 29)) (0.3.1.1)\n", | |
"Collecting onnxruntime\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/69/39/404df5ee608c548dacde43a17faf0248b183fa6163cf9c06aca6a511d760/onnxruntime-1.2.0-cp36-cp36m-manylinux1_x86_64.whl (3.7MB)\n", | |
"\u001b[K |████████████████████████████████| 3.7MB 47.1MB/s \n", | |
"\u001b[?25hRequirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /usr/local/lib/python3.6/dist-packages (from boto3->-r requirements.txt (line 9)) (0.3.3)\n", | |
"Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->-r requirements.txt (line 9)) (0.9.5)\n", | |
"Requirement already satisfied: botocore<1.16.0,>=1.15.38 in /usr/local/lib/python3.6/dist-packages (from boto3->-r requirements.txt (line 9)) (1.15.38)\n", | |
"Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->-r requirements.txt (line 11)) (1.24.3)\n", | |
"Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->-r requirements.txt (line 11)) (2.8)\n", | |
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->-r requirements.txt (line 11)) (3.0.4)\n", | |
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->-r requirements.txt (line 11)) (2020.4.5.1)\n", | |
"Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from scipy>=1.3.2->-r requirements.txt (line 13)) (1.18.2)\n", | |
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.6/dist-packages (from sklearn->-r requirements.txt (line 14)) (0.22.2.post1)\n", | |
"Requirement already satisfied: Keras>=2.2.4 in /usr/local/lib/python3.6/dist-packages (from seqeval->-r requirements.txt (line 16)) (2.3.1)\n", | |
"Requirement already satisfied: python-dateutil in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (2.8.1)\n", | |
"Collecting simplejson\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/98/87/a7b98aa9256c8843f92878966dc3d8d914c14aad97e2c5ce4798d5743e07/simplejson-3.17.0.tar.gz (83kB)\n", | |
"\u001b[K |████████████████████████████████| 92kB 11.2MB/s \n", | |
"\u001b[?25hRequirement already satisfied: sqlparse in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (0.3.1)\n", | |
"Collecting querystring-parser\n", | |
" Downloading https://files.pythonhosted.org/packages/4a/fa/f54f5662e0eababf0c49e92fd94bf178888562c0e7b677c8941bbbcd1bd6/querystring_parser-1.2.4.tar.gz\n", | |
"Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (1.12.0)\n", | |
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (3.13)\n", | |
"Requirement already satisfied: entrypoints in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (0.3)\n", | |
"Collecting docker>=3.6.0\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/58/74/379a9d30b1620def158c40b88c43e01c1936a287ebb97afab0699c601c57/docker-4.2.0-py2.py3-none-any.whl (143kB)\n", | |
"\u001b[K |████████████████████████████████| 153kB 43.3MB/s \n", | |
"\u001b[?25hCollecting alembic\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/60/1e/cabc75a189de0fbb2841d0975243e59bde8b7822bacbb95008ac6fe9ad47/alembic-1.4.2.tar.gz (1.1MB)\n", | |
"\u001b[K |████████████████████████████████| 1.1MB 43.5MB/s \n", | |
"\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", | |
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", | |
" Preparing wheel metadata ... \u001b[?25l\u001b[?25hdone\n", | |
"Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (7.1.1)\n", | |
"Collecting databricks-cli>=0.8.0\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/49/d1/fe0ba3d5c2b4b76ec035aa243bbc2fd0d60607a391f192ebe1656e17a4e2/databricks-cli-0.10.0.tar.gz (45kB)\n", | |
"\u001b[K |████████████████████████████████| 51kB 6.9MB/s \n", | |
"\u001b[?25hRequirement already satisfied: protobuf>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (3.10.0)\n", | |
"Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (1.3.16)\n", | |
"Requirement already satisfied: cloudpickle in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (1.3.0)\n", | |
"Collecting gunicorn\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/69/ca/926f7cd3a2014b16870086b2d0fdc84a9e49473c68a8dff8b57f7c156f43/gunicorn-20.0.4-py2.py3-none-any.whl (77kB)\n", | |
"\u001b[K |████████████████████████████████| 81kB 9.5MB/s \n", | |
"\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->-r requirements.txt (line 17)) (1.0.3)\n", | |
"Collecting gitpython>=2.1.0\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/19/1a/0df85d2bddbca33665d2148173d3281b290ac054b5f50163ea735740ac7b/GitPython-3.1.1-py3-none-any.whl (450kB)\n", | |
"\u001b[K |████████████████████████████████| 460kB 42.3MB/s \n", | |
"\u001b[?25hCollecting sacremoses\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/99/50/93509f906a40bffd7d175f97fd75ea328ad9bd91f48f59c4bd084c94a25e/sacremoses-0.0.41.tar.gz (883kB)\n", | |
"\u001b[K |████████████████████████████████| 890kB 38.4MB/s \n", | |
"\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->-r requirements.txt (line 19)) (2019.12.20)\n", | |
"Collecting tokenizers==0.5.2\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/d1/3f/73c881ea4723e43c1e9acf317cf407fab3a278daab3a69c98dcac511c04f/tokenizers-0.5.2-cp36-cp36m-manylinux1_x86_64.whl (3.7MB)\n", | |
"\u001b[K |████████████████████████████████| 3.7MB 41.4MB/s \n", | |
"\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->-r requirements.txt (line 19)) (3.0.12)\n", | |
"Collecting sentencepiece\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/74/f4/2d5214cbf13d06e7cb2c20d84115ca25b53ea76fa1f0ade0e3c9749de214/sentencepiece-0.1.85-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)\n", | |
"\u001b[K |████████████████████████████████| 1.0MB 35.2MB/s \n", | |
"\u001b[?25hRequirement already satisfied: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->-r requirements.txt (line 19)) (0.7)\n", | |
"Requirement already satisfied: itsdangerous>=0.24 in /usr/local/lib/python3.6/dist-packages (from flask->-r requirements.txt (line 24)) (1.1.0)\n", | |
"Requirement already satisfied: Jinja2>=2.10.1 in /usr/local/lib/python3.6/dist-packages (from flask->-r requirements.txt (line 24)) (2.11.1)\n", | |
"Requirement already satisfied: jsonschema in /usr/local/lib/python3.6/dist-packages (from flask-restplus->-r requirements.txt (line 25)) (2.6.0)\n", | |
"Requirement already satisfied: pytz in /usr/local/lib/python3.6/dist-packages (from flask-restplus->-r requirements.txt (line 25)) (2018.9)\n", | |
"Collecting aniso8601>=0.82\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/eb/e4/787e104b58eadc1a710738d4e418d7e599e4e778e52cb8e5d5ef6ddd5833/aniso8601-8.0.0-py2.py3-none-any.whl (43kB)\n", | |
"\u001b[K |████████████████████████████████| 51kB 7.5MB/s \n", | |
"\u001b[?25hCollecting onnx>=1.2.3\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/f5/f4/e126b60d109ad1e80020071484b935980b7cce1e4796073aab086a2d6902/onnx-1.6.0-cp36-cp36m-manylinux1_x86_64.whl (4.8MB)\n", | |
"\u001b[K |████████████████████████████████| 4.8MB 34.7MB/s \n", | |
"\u001b[?25hRequirement already satisfied: docutils<0.16,>=0.10 in /usr/local/lib/python3.6/dist-packages (from botocore<1.16.0,>=1.15.38->boto3->-r requirements.txt (line 9)) (0.15.2)\n", | |
"Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn->sklearn->-r requirements.txt (line 14)) (0.14.1)\n", | |
"Requirement already satisfied: keras-applications>=1.0.6 in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->-r requirements.txt (line 16)) (1.0.8)\n", | |
"Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->-r requirements.txt (line 16)) (2.10.0)\n", | |
"Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->-r requirements.txt (line 16)) (1.1.0)\n", | |
"Collecting websocket-client>=0.32.0\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/4c/5f/f61b420143ed1c8dc69f9eaec5ff1ac36109d52c80de49d66e0c36c3dfdf/websocket_client-0.57.0-py2.py3-none-any.whl (200kB)\n", | |
"\u001b[K |████████████████████████████████| 204kB 49.1MB/s \n", | |
"\u001b[?25hCollecting python-editor>=0.3\n", | |
" Downloading https://files.pythonhosted.org/packages/c6/d3/201fc3abe391bbae6606e6f1d598c15d367033332bd54352b12f35513717/python_editor-1.0.4-py3-none-any.whl\n", | |
"Collecting Mako\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/50/78/f6ade1e18aebda570eed33b7c534378d9659351cadce2fcbc7b31be5f615/Mako-1.1.2-py2.py3-none-any.whl (75kB)\n", | |
"\u001b[K |████████████████████████████████| 81kB 9.8MB/s \n", | |
"\u001b[?25hRequirement already satisfied: tabulate>=0.7.7 in /usr/local/lib/python3.6/dist-packages (from databricks-cli>=0.8.0->mlflow==1.0.0->-r requirements.txt (line 17)) (0.8.7)\n", | |
"Collecting configparser>=0.3.5\n", | |
" Downloading https://files.pythonhosted.org/packages/4b/6b/01baa293090240cf0562cc5eccb69c6f5006282127f2b846fad011305c79/configparser-5.0.0-py3-none-any.whl\n", | |
"Collecting gitdb<5,>=4.0.1\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/74/52/ca35448b56c53a079d3ffe18b1978c6e424f6d4df02404877094c89f5bfb/gitdb-4.0.4-py3-none-any.whl (63kB)\n", | |
"\u001b[K |████████████████████████████████| 71kB 10.2MB/s \n", | |
"\u001b[?25hRequirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from Jinja2>=2.10.1->flask->-r requirements.txt (line 24)) (1.1.1)\n", | |
"Requirement already satisfied: typing-extensions>=3.6.2.1 in /usr/local/lib/python3.6/dist-packages (from onnx>=1.2.3->onnxruntime->-r requirements.txt (line 30)) (3.6.6)\n", | |
"Collecting smmap<4,>=3.0.1\n", | |
" Downloading https://files.pythonhosted.org/packages/27/b1/e379cfb7c07bbf8faee29c4a1a2469dbea525f047c2b454c4afdefa20a30/smmap-3.0.2-py2.py3-none-any.whl\n", | |
"Building wheels for collected packages: alembic\n", | |
" Building wheel for alembic (PEP 517) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for alembic: filename=alembic-1.4.2-cp36-none-any.whl size=159543 sha256=0891797d8c0abd6a76807cafacf33ef9e161a7f2b83f9a95724a3803bc2c7a37\n", | |
" Stored in directory: /root/.cache/pip/wheels/1f/04/83/76023f7a4c14688c0b5c2682a96392cfdd3ee4449eaaa287ef\n", | |
"Successfully built alembic\n", | |
"Building wheels for collected packages: seqeval, simplejson, querystring-parser, databricks-cli, sacremoses\n", | |
" Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for seqeval: filename=seqeval-0.0.12-cp36-none-any.whl size=7424 sha256=b40eb771b23d613b9bd479540b94e508183d8fce88457fc51cd63f885c644a83\n", | |
" Stored in directory: /root/.cache/pip/wheels/4f/32/0a/df3b340a82583566975377d65e724895b3fad101a3fb729f68\n", | |
" Building wheel for simplejson (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for simplejson: filename=simplejson-3.17.0-cp36-cp36m-linux_x86_64.whl size=114206 sha256=b916793fe2b66118b83b28018acf3970123a30be008b1712d502dcbc45e65d7b\n", | |
" Stored in directory: /root/.cache/pip/wheels/86/c0/83/dcd0339abb2640544bb8e0938aab2d069cef55e5647ce6e097\n", | |
" Building wheel for querystring-parser (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for querystring-parser: filename=querystring_parser-1.2.4-cp36-none-any.whl size=7079 sha256=e3706658175beab84332d9595bb8412240ba680bf5cd35d8b4c2d22bd93ea44a\n", | |
" Stored in directory: /root/.cache/pip/wheels/1e/41/34/23ebf5d1089a9aed847951e0ee375426eb4ad0a7079d88d41e\n", | |
" Building wheel for databricks-cli (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for databricks-cli: filename=databricks_cli-0.10.0-cp36-none-any.whl size=84285 sha256=f5cf7160929d401610bdbabc1b61f3596e26f43d90605c83955e8685ff2b19ad\n", | |
" Stored in directory: /root/.cache/pip/wheels/1e/e5/2d/a19c0bfd38005176063f130d72de17cb3d2d32c0ee384e7493\n", | |
" Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for sacremoses: filename=sacremoses-0.0.41-cp36-none-any.whl size=893334 sha256=4847c63d318afc68ec3491f90c2538cec9a07f8b35f525ad57f31c208e38b215\n", | |
" Stored in directory: /root/.cache/pip/wheels/22/5a/d4/b020a81249de7dc63758a34222feaa668dbe8ebfe9170cc9b1\n", | |
"Successfully built seqeval simplejson querystring-parser databricks-cli sacremoses\n", | |
"Installing collected packages: seqeval, simplejson, querystring-parser, websocket-client, docker, python-editor, Mako, alembic, configparser, databricks-cli, gunicorn, smmap, gitdb, gitpython, mlflow, sacremoses, tokenizers, sentencepiece, transformers, dotmap, Werkzeug, aniso8601, flask-restplus, flask-cors, onnx, onnxruntime\n", | |
" Found existing installation: Werkzeug 1.0.1\n", | |
" Uninstalling Werkzeug-1.0.1:\n", | |
" Successfully uninstalled Werkzeug-1.0.1\n", | |
"Successfully installed Mako-1.1.2 Werkzeug-0.16.1 alembic-1.4.2 aniso8601-8.0.0 configparser-5.0.0 databricks-cli-0.10.0 docker-4.2.0 dotmap-1.3.0 flask-cors-3.0.8 flask-restplus-0.13.0 gitdb-4.0.4 gitpython-3.1.1 gunicorn-20.0.4 mlflow-1.0.0 onnx-1.6.0 onnxruntime-1.2.0 python-editor-1.0.4 querystring-parser-1.2.4 sacremoses-0.0.41 sentencepiece-0.1.85 seqeval-0.0.12 simplejson-3.17.0 smmap-3.0.2 tokenizers-0.5.2 transformers-2.7.0 websocket-client-0.57.0\n", | |
"Obtaining file:///content/FARM\n", | |
"Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (46.1.3)\n", | |
"Requirement already satisfied: wheel in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.34.2)\n", | |
"Requirement already satisfied: torch>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.4.0)\n", | |
"Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (4.38.0)\n", | |
"Requirement already satisfied: boto3 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.12.38)\n", | |
"Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (2.21.0)\n", | |
"Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.4.1)\n", | |
"Requirement already satisfied: sklearn in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.0)\n", | |
"Requirement already satisfied: seqeval in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.0.12)\n", | |
"Requirement already satisfied: mlflow==1.0.0 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.0.0)\n", | |
"Requirement already satisfied: transformers==2.7.0 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (2.7.0)\n", | |
"Requirement already satisfied: dotmap==1.3.0 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.3.0)\n", | |
"Requirement already satisfied: Werkzeug==0.16.1 in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.16.1)\n", | |
"Requirement already satisfied: flask in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.1.2)\n", | |
"Requirement already satisfied: flask-restplus in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.13.0)\n", | |
"Requirement already satisfied: flask-cors in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (3.0.8)\n", | |
"Requirement already satisfied: dill in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (0.3.1.1)\n", | |
"Requirement already satisfied: onnxruntime in /usr/local/lib/python3.6/dist-packages (from farm==0.4.2) (1.2.0)\n", | |
"Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->farm==0.4.2) (0.9.5)\n", | |
"Requirement already satisfied: botocore<1.16.0,>=1.15.38 in /usr/local/lib/python3.6/dist-packages (from boto3->farm==0.4.2) (1.15.38)\n", | |
"Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /usr/local/lib/python3.6/dist-packages (from boto3->farm==0.4.2) (0.3.3)\n", | |
"Requirement already satisfied: idna<2.9,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->farm==0.4.2) (2.8)\n", | |
"Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->farm==0.4.2) (1.24.3)\n", | |
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->farm==0.4.2) (3.0.4)\n", | |
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->farm==0.4.2) (2020.4.5.1)\n", | |
"Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from scipy>=1.3.2->farm==0.4.2) (1.18.2)\n", | |
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.6/dist-packages (from sklearn->farm==0.4.2) (0.22.2.post1)\n", | |
"Requirement already satisfied: Keras>=2.2.4 in /usr/local/lib/python3.6/dist-packages (from seqeval->farm==0.4.2) (2.3.1)\n", | |
"Requirement already satisfied: cloudpickle in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.3.0)\n", | |
"Requirement already satisfied: protobuf>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (3.10.0)\n", | |
"Requirement already satisfied: databricks-cli>=0.8.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (0.10.0)\n", | |
"Requirement already satisfied: simplejson in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (3.17.0)\n", | |
"Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.12.0)\n", | |
"Requirement already satisfied: gunicorn in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (20.0.4)\n", | |
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (3.13)\n", | |
"Requirement already satisfied: entrypoints in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (0.3)\n", | |
"Requirement already satisfied: sqlparse in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (0.3.1)\n", | |
"Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.3.16)\n", | |
"Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.0.3)\n", | |
"Requirement already satisfied: gitpython>=2.1.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (3.1.1)\n", | |
"Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (7.1.1)\n", | |
"Requirement already satisfied: docker>=3.6.0 in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (4.2.0)\n", | |
"Requirement already satisfied: alembic in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.4.2)\n", | |
"Requirement already satisfied: python-dateutil in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (2.8.1)\n", | |
"Requirement already satisfied: querystring-parser in /usr/local/lib/python3.6/dist-packages (from mlflow==1.0.0->farm==0.4.2) (1.2.4)\n", | |
"Requirement already satisfied: dataclasses; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (0.7)\n", | |
"Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (3.0.12)\n", | |
"Requirement already satisfied: tokenizers==0.5.2 in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (0.5.2)\n", | |
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (2019.12.20)\n", | |
"Requirement already satisfied: sentencepiece in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (0.1.85)\n", | |
"Requirement already satisfied: sacremoses in /usr/local/lib/python3.6/dist-packages (from transformers==2.7.0->farm==0.4.2) (0.0.41)\n", | |
"Requirement already satisfied: Jinja2>=2.10.1 in /usr/local/lib/python3.6/dist-packages (from flask->farm==0.4.2) (2.11.1)\n", | |
"Requirement already satisfied: itsdangerous>=0.24 in /usr/local/lib/python3.6/dist-packages (from flask->farm==0.4.2) (1.1.0)\n", | |
"Requirement already satisfied: pytz in /usr/local/lib/python3.6/dist-packages (from flask-restplus->farm==0.4.2) (2018.9)\n", | |
"Requirement already satisfied: aniso8601>=0.82 in /usr/local/lib/python3.6/dist-packages (from flask-restplus->farm==0.4.2) (8.0.0)\n", | |
"Requirement already satisfied: jsonschema in /usr/local/lib/python3.6/dist-packages (from flask-restplus->farm==0.4.2) (2.6.0)\n", | |
"Requirement already satisfied: onnx>=1.2.3 in /usr/local/lib/python3.6/dist-packages (from onnxruntime->farm==0.4.2) (1.6.0)\n", | |
"Requirement already satisfied: docutils<0.16,>=0.10 in /usr/local/lib/python3.6/dist-packages (from botocore<1.16.0,>=1.15.38->boto3->farm==0.4.2) (0.15.2)\n", | |
"Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn->sklearn->farm==0.4.2) (0.14.1)\n", | |
"Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->farm==0.4.2) (2.10.0)\n", | |
"Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->farm==0.4.2) (1.1.0)\n", | |
"Requirement already satisfied: keras-applications>=1.0.6 in /usr/local/lib/python3.6/dist-packages (from Keras>=2.2.4->seqeval->farm==0.4.2) (1.0.8)\n", | |
"Requirement already satisfied: tabulate>=0.7.7 in /usr/local/lib/python3.6/dist-packages (from databricks-cli>=0.8.0->mlflow==1.0.0->farm==0.4.2) (0.8.7)\n", | |
"Requirement already satisfied: configparser>=0.3.5 in /usr/local/lib/python3.6/dist-packages (from databricks-cli>=0.8.0->mlflow==1.0.0->farm==0.4.2) (5.0.0)\n", | |
"Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.6/dist-packages (from gitpython>=2.1.0->mlflow==1.0.0->farm==0.4.2) (4.0.4)\n", | |
"Requirement already satisfied: websocket-client>=0.32.0 in /usr/local/lib/python3.6/dist-packages (from docker>=3.6.0->mlflow==1.0.0->farm==0.4.2) (0.57.0)\n", | |
"Requirement already satisfied: Mako in /usr/local/lib/python3.6/dist-packages (from alembic->mlflow==1.0.0->farm==0.4.2) (1.1.2)\n", | |
"Requirement already satisfied: python-editor>=0.3 in /usr/local/lib/python3.6/dist-packages (from alembic->mlflow==1.0.0->farm==0.4.2) (1.0.4)\n", | |
"Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from Jinja2>=2.10.1->flask->farm==0.4.2) (1.1.1)\n", | |
"Requirement already satisfied: typing-extensions>=3.6.2.1 in /usr/local/lib/python3.6/dist-packages (from onnx>=1.2.3->onnxruntime->farm==0.4.2) (3.6.6)\n", | |
"Requirement already satisfied: smmap<4,>=3.0.1 in /usr/local/lib/python3.6/dist-packages (from gitdb<5,>=4.0.1->gitpython>=2.1.0->mlflow==1.0.0->farm==0.4.2) (3.0.2)\n", | |
"Installing collected packages: farm\n", | |
" Running setup.py develop for farm\n", | |
"Successfully installed farm\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "kBzm4YkZg6xl", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 364 | |
}, | |
"outputId": "4040f151-0e2e-4ead-b6f2-39fe7bdd56b1" | |
}, | |
"source": [ | |
"import logging\n", | |
"from pathlib import Path\n", | |
"\n", | |
"from farm.data_handler.data_silo import DataSilo\n", | |
"from farm.data_handler.processor import NERProcessor\n", | |
"from farm.modeling.optimization import initialize_optimizer\n", | |
"from farm.infer import Inferencer\n", | |
"from farm.modeling.adaptive_model import AdaptiveModel\n", | |
"from farm.modeling.language_model import LanguageModel\n", | |
"from farm.modeling.prediction_head import TokenClassificationHead\n", | |
"from farm.modeling.tokenization import Tokenizer\n", | |
"from farm.train import Trainer\n", | |
"from farm.utils import set_all_seeds, MLFlowLogger, initialize_device_settings\n", | |
"\n", | |
"logging.basicConfig(\n", | |
" format=\"%(asctime)s - %(levelname)s - %(name)s - %(message)s\",\n", | |
" datefmt=\"%m/%d/%Y %H:%M:%S\",\n", | |
" level=logging.INFO,\n", | |
")\n", | |
"\n", | |
"ml_logger = MLFlowLogger(tracking_uri=\"https://public-mlflow.deepset.ai/\")\n", | |
"ml_logger.init_experiment(experiment_name=\"Public_FARM\", run_name=\"Ren_en-ner-colb2\")\n" | |
], | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"04/15/2020 20:11:05 - INFO - transformers.file_utils - PyTorch version 1.4.0 available.\n", | |
"04/15/2020 20:11:06 - INFO - transformers.file_utils - TensorFlow version 2.2.0-rc2 available.\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
" __ __ _ _ \n", | |
" \\ \\ / / | | | | \n", | |
" \\ \\ /\\ / /__| | ___ ___ _ __ ___ ___ | |_ ___ \n", | |
" \\ \\/ \\/ / _ \\ |/ __/ _ \\| '_ ` _ \\ / _ \\ | __/ _ \\ \n", | |
" \\ /\\ / __/ | (_| (_) | | | | | | __/ | || (_) |\n", | |
" \\/ \\/ \\___|_|\\___\\___/|_| |_| |_|\\___| \\__\\___/ \n", | |
" ______ _____ __ __ \n", | |
" | ____/\\ | __ \\| \\/ | _.-^-._ .--.\n", | |
" | |__ / \\ | |__) | \\ / | .-' _ '-. |__|\n", | |
" | __/ /\\ \\ | _ /| |\\/| | / |_| \\| |\n", | |
" | | / ____ \\| | \\ \\| | | | / \\ |\n", | |
" |_|/_/ \\_\\_| \\_\\_| |_| /| _____ |\\ |\n", | |
" | |==|==| | |\n", | |
"|---||---|---|---|---|---|---|---|---| |--|--| | |\n", | |
"|---||---|---|---|---|---|---|---|---| |==|==| | |\n", | |
"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
" \n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "nQqRPoiahHTE", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 1000, | |
"referenced_widgets": [ | |
"1b39397a3b7c4f46af0f78c302af8b73", | |
"1954a748853b45018b25be2dccca4e80", | |
"c60b9896692f41d0ac6f25bf6aa4ea24", | |
"9855be6e35d047b1806ca63798ef123e", | |
"7c9049195247414fb19297ab266e1539", | |
"7d7eb476372e4a3bb7f47b25431a6150", | |
"bbbd1a43688549968fd2ce8a924e403b", | |
"1d0391e4ee324d25a8cf93f4758fd62f" | |
] | |
}, | |
"outputId": "31a5af11-0846-4499-b9c9-b4de618877ea" | |
}, | |
"source": [ | |
"##########################\n", | |
"########## Settings\n", | |
"##########################\n", | |
"set_all_seeds(seed=42)\n", | |
"device, n_gpu = initialize_device_settings(use_cuda=True)#, local_rank=-1, use_amp=None)\n", | |
"n_epochs = 2\n", | |
"# TODO gradient_accumulation_steps?\n", | |
"# TODO layer_dims?\n", | |
"warmup_proportion = 0.4\n", | |
"batch_size = 64\n", | |
"evaluate_every = 400\n", | |
"lang_model = \"bert-base-cased\"\n", | |
"do_lower_case = False\n", | |
"\n", | |
"# 1.Create a tokenizer\n", | |
"tokenizer = Tokenizer.load(\n", | |
" pretrained_model_name_or_path=lang_model, do_lower_case=do_lower_case\n", | |
")\n", | |
"\n", | |
"# 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset\n", | |
"ner_labels = [\"[PAD]\", \"X\", \"O\", \"B-MISC\", \"I-MISC\", \"B-PER\", \"I-PER\", \"B-ORG\", \"I-ORG\", \"B-LOC\", \"I-LOC\", \"B-OTH\", \"I-OTH\"]\n", | |
"\n", | |
"processor = NERProcessor(\n", | |
" tokenizer=tokenizer, max_seq_len=128, data_dir=Path(\"../data/conll03-en\"), delimiter=\" \", metric=\"seq_f1\", label_list=ner_labels\n", | |
")\n", | |
"\n", | |
"# 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets\n", | |
"data_silo = DataSilo(processor=processor, batch_size=batch_size)\n" | |
], | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"04/15/2020 20:11:30 - INFO - farm.utils - device: cuda n_gpu: 1, distributed training: False, automatic mixed precision training: None\n", | |
"04/15/2020 20:11:30 - INFO - farm.modeling.tokenization - Loading tokenizer of type 'BertTokenizer'\n", | |
"04/15/2020 20:11:30 - INFO - filelock - Lock 139915751362176 acquired on /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1.lock\n", | |
"04/15/2020 20:11:30 - INFO - transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmp0l3ymdpt\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "1b39397a3b7c4f46af0f78c302af8b73", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(IntProgress(value=0, description='Downloading', max=213450, style=ProgressStyle(description_wid…" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"04/15/2020 20:11:30 - INFO - transformers.file_utils - storing https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt in cache at /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1\n", | |
"04/15/2020 20:11:30 - INFO - transformers.file_utils - creating metadata file for /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1\n", | |
"04/15/2020 20:11:30 - INFO - filelock - Lock 139915751362176 released on /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1.lock\n", | |
"04/15/2020 20:11:30 - INFO - transformers.tokenization_utils - loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt from cache at /root/.cache/torch/transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"04/15/2020 20:11:31 - INFO - farm.data_handler.data_silo - \n", | |
"Loading data into the data silo ... \n", | |
" ______\n", | |
" |o | !\n", | |
" __ |:`_|---'-.\n", | |
" |__|______.-/ _ \\-----.| \n", | |
" (o)(o)------'\\ _ / ( ) \n", | |
" \n", | |
"04/15/2020 20:11:31 - INFO - farm.data_handler.data_silo - Loading train set from: ../data/conll03-en/train.txt \n", | |
"04/15/2020 20:11:31 - INFO - farm.data_handler.utils - Couldn't find ../data/conll03-en/train.txt locally. Trying to download ...\n", | |
"04/15/2020 20:11:31 - INFO - farm.data_handler.utils - downloading and extracting file conll03-en to dir /content/data\n", | |
"04/15/2020 20:11:32 - INFO - farm.data_handler.data_silo - Got ya 1 parallel workers to convert 14041 dictionaries to pytorch datasets (chunksize = 2000)...\n", | |
"04/15/2020 20:11:32 - INFO - farm.data_handler.data_silo - 0 \n", | |
"04/15/2020 20:11:32 - INFO - farm.data_handler.data_silo - /w\\\n", | |
"04/15/2020 20:11:32 - INFO - farm.data_handler.data_silo - /'\\\n", | |
"04/15/2020 20:11:32 - INFO - farm.data_handler.data_silo - \n", | |
"Preprocessing Dataset ../data/conll03-en/train.txt: 0%| | 0/14041 [00:00<?, ? Dicts/s]04/15/2020 20:11:34 - INFO - farm.data_handler.processor - *** Show 2 random examples ***\n", | |
"04/15/2020 20:11:34 - INFO - farm.data_handler.processor - \n", | |
"\n", | |
" .--. _____ _ \n", | |
" .'_\\/_'. / ____| | | \n", | |
" '. /\\ .' | (___ __ _ _ __ ___ _ __ | | ___ \n", | |
" \"||\" \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n", | |
" || /\\ ____) | (_| | | | | | | |_) | | __/\n", | |
" /\\ ||//\\) |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n", | |
" (/\\||/ |_| \n", | |
"______\\||/___________________________________________ \n", | |
"\n", | |
"ID: train-1283-0\n", | |
"Clear Text: \n", | |
" \ttext: ALKHAN-YURT , Russia 1996-08-22\n", | |
" \tner_label: ['B-LOC', 'O', 'B-LOC', 'O']\n", | |
"Tokenized: \n", | |
" \ttokens: ['AL', '##K', '##HA', '##N', '-', 'Y', '##UR', '##T', ',', 'Russia', '1996', '-', '08', '-', '22']\n", | |
" \toffsets: [0, 2, 3, 5, 6, 7, 8, 10, 12, 14, 21, 25, 26, 28, 29]\n", | |
" \tstart_of_word: [True, False, False, False, False, False, False, False, True, True, True, False, False, False, False]\n", | |
"Features: \n", | |
" \tinput_ids: [101, 18589, 2428, 11612, 2249, 118, 162, 19556, 1942, 117, 2733, 1820, 118, 4775, 118, 1659, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tinitial_mask: [0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tner_label_ids: [1, 9, 1, 1, 1, 1, 1, 1, 1, 2, 9, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
"_____________________________________________________\n", | |
"04/15/2020 20:11:34 - INFO - farm.data_handler.processor - \n", | |
"\n", | |
" .--. _____ _ \n", | |
" .'_\\/_'. / ____| | | \n", | |
" '. /\\ .' | (___ __ _ _ __ ___ _ __ | | ___ \n", | |
" \"||\" \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n", | |
" || /\\ ____) | (_| | | | | | | |_) | | __/\n", | |
" /\\ ||//\\) |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n", | |
" (/\\||/ |_| \n", | |
"______\\||/___________________________________________ \n", | |
"\n", | |
"ID: train-252-0\n", | |
"Clear Text: \n", | |
" \ttext: Software Revenue 2,383 1,558 1,086 1,074\n", | |
" \tner_label: ['O', 'O', 'O', 'O', 'O', 'O']\n", | |
"Tokenized: \n", | |
" \ttokens: ['Software', 'Revenue', '2', ',', '38', '##3', '1', ',', '55', '##8', '1', ',', '08', '##6', '1', ',', '07', '##4']\n", | |
" \toffsets: [0, 9, 17, 18, 19, 21, 23, 24, 25, 27, 29, 30, 31, 33, 35, 36, 37, 39]\n", | |
" \tstart_of_word: [True, True, True, False, False, False, True, False, False, False, True, False, False, False, True, False, False, False]\n", | |
"Features: \n", | |
" \tinput_ids: [101, 10331, 16944, 123, 117, 3383, 1495, 122, 117, 3731, 1604, 122, 117, 4775, 1545, 122, 117, 5004, 1527, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tinitial_mask: [0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tner_label_ids: [1, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
"_____________________________________________________\n", | |
"Preprocessing Dataset ../data/conll03-en/train.txt: 100%|██████████| 14041/14041 [00:13<00:00, 1058.31 Dicts/s]\n", | |
"04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo - Loading dev set from: ../data/conll03-en/dev.txt\n", | |
"04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo - Got ya 1 parallel workers to convert 3250 dictionaries to pytorch datasets (chunksize = 650)...\n", | |
"04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo - 0 \n", | |
"04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo - /w\\\n", | |
"04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo - / \\\n", | |
"04/15/2020 20:11:45 - INFO - farm.data_handler.data_silo - \n", | |
"Preprocessing Dataset ../data/conll03-en/dev.txt: 0%| | 0/3250 [00:00<?, ? Dicts/s]04/15/2020 20:11:46 - INFO - farm.data_handler.processor - *** Show 2 random examples ***\n", | |
"04/15/2020 20:11:46 - INFO - farm.data_handler.processor - \n", | |
"\n", | |
" .--. _____ _ \n", | |
" .'_\\/_'. / ____| | | \n", | |
" '. /\\ .' | (___ __ _ _ __ ___ _ __ | | ___ \n", | |
" \"||\" \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n", | |
" || /\\ ____) | (_| | | | | | | |_) | | __/\n", | |
" /\\ ||//\\) |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n", | |
" (/\\||/ |_| \n", | |
"______\\||/___________________________________________ \n", | |
"\n", | |
"ID: train-259-0\n", | |
"Clear Text: \n", | |
" \ttext: HOUSTON AT PITTSBURGH\n", | |
" \tner_label: ['B-ORG', 'O', 'B-LOC']\n", | |
"Tokenized: \n", | |
" \ttokens: ['H', '##O', '##US', '##TO', '##N', 'AT', 'P', '##IT', '##TS', '##B', '##UR', '##G', '##H']\n", | |
" \toffsets: [0, 1, 2, 4, 6, 8, 11, 12, 14, 16, 17, 19, 20]\n", | |
" \tstart_of_word: [True, False, False, False, False, True, True, False, False, False, False, False, False]\n", | |
"Features: \n", | |
" \tinput_ids: [101, 145, 2346, 13329, 18082, 2249, 13020, 153, 12150, 11365, 2064, 19556, 2349, 3048, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tinitial_mask: [0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tner_label_ids: [1, 7, 1, 1, 1, 1, 2, 9, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
"_____________________________________________________\n", | |
"04/15/2020 20:11:46 - INFO - farm.data_handler.processor - \n", | |
"\n", | |
" .--. _____ _ \n", | |
" .'_\\/_'. / ____| | | \n", | |
" '. /\\ .' | (___ __ _ _ __ ___ _ __ | | ___ \n", | |
" \"||\" \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n", | |
" || /\\ ____) | (_| | | | | | | |_) | | __/\n", | |
" /\\ ||//\\) |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n", | |
" (/\\||/ |_| \n", | |
"______\\||/___________________________________________ \n", | |
"\n", | |
"ID: train-92-0\n", | |
"Clear Text: \n", | |
" \ttext: Red Star ( Yugoslavia ) beat Dinamo ( Russia ) 92-90 ( halftime\n", | |
" \tner_label: ['B-ORG', 'I-ORG', 'O', 'B-LOC', 'O', 'O', 'B-ORG', 'O', 'B-LOC', 'O', 'O', 'O', 'O']\n", | |
"Tokenized: \n", | |
" \ttokens: ['Red', 'Star', '(', 'Yugoslavia', ')', 'beat', 'Dinamo', '(', 'Russia', ')', '92', '-', '90', '(', 'halftime']\n", | |
" \toffsets: [0, 4, 9, 11, 22, 24, 29, 36, 38, 45, 47, 49, 50, 53, 55]\n", | |
" \tstart_of_word: [True, True, True, True, True, True, True, True, True, True, True, False, False, True, True]\n", | |
"Features: \n", | |
" \tinput_ids: [101, 2156, 2537, 113, 8575, 114, 3222, 24780, 113, 2733, 114, 5556, 118, 3078, 113, 26077, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tinitial_mask: [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tner_label_ids: [1, 7, 8, 2, 9, 2, 2, 7, 2, 9, 2, 2, 1, 1, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
"_____________________________________________________\n", | |
"Preprocessing Dataset ../data/conll03-en/dev.txt: 100%|██████████| 3250/3250 [00:03<00:00, 1028.94 Dicts/s]\n", | |
"04/15/2020 20:11:48 - INFO - farm.data_handler.data_silo - Loading test set from: ../data/conll03-en/test.txt\n", | |
"04/15/2020 20:11:49 - INFO - farm.data_handler.data_silo - Got ya 1 parallel workers to convert 3453 dictionaries to pytorch datasets (chunksize = 691)...\n", | |
"04/15/2020 20:11:49 - INFO - farm.data_handler.data_silo - 0 \n", | |
"04/15/2020 20:11:49 - INFO - farm.data_handler.data_silo - /w\\\n", | |
"04/15/2020 20:11:49 - INFO - farm.data_handler.data_silo - /'\\\n", | |
"04/15/2020 20:11:49 - INFO - farm.data_handler.data_silo - \n", | |
"Preprocessing Dataset ../data/conll03-en/test.txt: 0%| | 0/3453 [00:00<?, ? Dicts/s]04/15/2020 20:11:49 - INFO - farm.data_handler.processor - *** Show 2 random examples ***\n", | |
"04/15/2020 20:11:49 - INFO - farm.data_handler.processor - \n", | |
"\n", | |
" .--. _____ _ \n", | |
" .'_\\/_'. / ____| | | \n", | |
" '. /\\ .' | (___ __ _ _ __ ___ _ __ | | ___ \n", | |
" \"||\" \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n", | |
" || /\\ ____) | (_| | | | | | | |_) | | __/\n", | |
" /\\ ||//\\) |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n", | |
" (/\\||/ |_| \n", | |
"______\\||/___________________________________________ \n", | |
"\n", | |
"ID: train-573-0\n", | |
"Clear Text: \n", | |
" \ttext: Feyenoord 17 11 3 3 29 20 36\n", | |
" \tner_label: ['B-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'O']\n", | |
"Tokenized: \n", | |
" \ttokens: ['Fe', '##ye', '##no', '##ord', '17', '11', '3', '3', '29', '20', '36']\n", | |
" \toffsets: [0, 2, 4, 6, 10, 13, 16, 18, 20, 23, 26]\n", | |
" \tstart_of_word: [True, False, False, False, True, True, True, True, True, True, True]\n", | |
"Features: \n", | |
" \tinput_ids: [101, 11907, 4980, 2728, 6944, 1542, 1429, 124, 124, 1853, 1406, 3164, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tinitial_mask: [0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tner_label_ids: [1, 7, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
"_____________________________________________________\n", | |
"04/15/2020 20:11:49 - INFO - farm.data_handler.processor - \n", | |
"\n", | |
" .--. _____ _ \n", | |
" .'_\\/_'. / ____| | | \n", | |
" '. /\\ .' | (___ __ _ _ __ ___ _ __ | | ___ \n", | |
" \"||\" \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n", | |
" || /\\ ____) | (_| | | | | | | |_) | | __/\n", | |
" /\\ ||//\\) |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n", | |
" (/\\||/ |_| \n", | |
"______\\||/___________________________________________ \n", | |
"\n", | |
"ID: train-601-0\n", | |
"Clear Text: \n", | |
" \ttext: Bayern Munich 16 9 6 1 26 14 33\n", | |
" \tner_label: ['B-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'O']\n", | |
"Tokenized: \n", | |
" \ttokens: ['Bayern', 'Munich', '16', '9', '6', '1', '26', '14', '33']\n", | |
" \toffsets: [0, 7, 14, 17, 19, 21, 23, 26, 29]\n", | |
" \tstart_of_word: [True, True, True, True, True, True, True, True, True]\n", | |
"Features: \n", | |
" \tinput_ids: [101, 23517, 6947, 1479, 130, 127, 122, 1744, 1489, 3081, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tinitial_mask: [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tner_label_ids: [1, 7, 8, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
"_____________________________________________________\n", | |
"Preprocessing Dataset ../data/conll03-en/test.txt: 100%|██████████| 3453/3453 [00:02<00:00, 1183.91 Dicts/s]\n", | |
"04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo - Examples in train: 14041\n", | |
"04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo - Examples in dev : 3250\n", | |
"04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo - Examples in test : 3453\n", | |
"04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo - \n", | |
"04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo - Longest sequence length observed after clipping: 128\n", | |
"04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo - Average sequence length after clipping: 21.411010611779787\n", | |
"04/15/2020 20:11:52 - INFO - farm.data_handler.data_silo - Proportion clipped: 7.121999857560003e-05\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "qJEnJbStibuG", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 448, | |
"referenced_widgets": [ | |
"aacfc007c1f5457c8f54b30ec66863b1", | |
"40ff435523b04903aad8e65507a662d3", | |
"2ce03980b8eb47e3bc4def1a25f9f150", | |
"3401c4064ff449ad9b7a785f85629ec1", | |
"0b3502a0bc304f51a13df06516697210", | |
"bc38e1ba9e1e47d0846c62ab55042118", | |
"58a7a998b3bd48b8b273fca4a7d628aa", | |
"de3726de50c84a459b04df3a2e2710bd", | |
"897ecef9f62e46449dc5da3677f31a51", | |
"30ce00d9d7254e42a75f6eb1bd3aea56", | |
"6e04f43cc22143a49d165fa78cdebf1d", | |
"34078f982a4a49fb87d858d6b926d26c", | |
"ee65e576985f4b9f975805df04c2e56a", | |
"13d9851cad514f188a2281b5f81827e0", | |
"ef25ad38764f424284acc290ab4208e1", | |
"17c4e90625074475a3cba8ad8f50a527" | |
] | |
}, | |
"outputId": "0bef4321-55d7-4c6c-fb87-d9305be4268b" | |
}, | |
"source": [ | |
"\n", | |
"# 4. Create an AdaptiveModel\n", | |
"# a) which consists of a pretrained language model as a basis\n", | |
"language_model = LanguageModel.load(lang_model)\n", | |
"# b) and a prediction head on top that is suited for our task => NER\n", | |
"prediction_head = TokenClassificationHead(num_labels=len(ner_labels))\n", | |
"\n", | |
"model = AdaptiveModel(\n", | |
" language_model=language_model,\n", | |
" prediction_heads=[prediction_head],\n", | |
" embeds_dropout_prob=0.1,\n", | |
" lm_output_types=[\"per_token\"],\n", | |
" device=device,\n", | |
")\n", | |
"\n", | |
"# 5. Create an optimizer\n", | |
"model, optimizer, lr_schedule = initialize_optimizer(\n", | |
" model=model,\n", | |
" learning_rate=5e-5,\n", | |
" schedule_opts={\"name\": \"LinearWarmup\", \"warmup_proportion\": warmup_proportion},\n", | |
" n_batches=len(data_silo.loaders[\"train\"]),\n", | |
" n_epochs=n_epochs,\n", | |
" device=device,\n", | |
")\n", | |
"\n", | |
"# 6. Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time\n", | |
"trainer = Trainer(\n", | |
" model=model,\n", | |
" optimizer=optimizer,\n", | |
" data_silo=data_silo,\n", | |
" epochs=n_epochs,\n", | |
" n_gpu=n_gpu,\n", | |
" lr_schedule=lr_schedule,\n", | |
" evaluate_every=evaluate_every,\n", | |
" device=device,\n", | |
")\n" | |
], | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"04/15/2020 20:12:49 - INFO - filelock - Lock 139915603172432 acquired on /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.3d5adf10d3445c36ce131f4c6416aa62e9b58e1af56b97664773f4858a46286e.lock\n", | |
"04/15/2020 20:12:49 - INFO - transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmp0axpxuxr\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "aacfc007c1f5457c8f54b30ec66863b1", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(IntProgress(value=0, description='Downloading', max=361, style=ProgressStyle(description_width=…" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"04/15/2020 20:12:49 - INFO - transformers.file_utils - storing https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json in cache at /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.3d5adf10d3445c36ce131f4c6416aa62e9b58e1af56b97664773f4858a46286e\n", | |
"04/15/2020 20:12:49 - INFO - transformers.file_utils - creating metadata file for /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.3d5adf10d3445c36ce131f4c6416aa62e9b58e1af56b97664773f4858a46286e\n", | |
"04/15/2020 20:12:49 - INFO - filelock - Lock 139915603172432 released on /root/.cache/torch/transformers/b945b69218e98b3e2c95acf911789741307dec43c698d35fad11c1ae28bda352.3d5adf10d3445c36ce131f4c6416aa62e9b58e1af56b97664773f4858a46286e.lock\n", | |
"04/15/2020 20:12:49 - INFO - filelock - Lock 139915764977336 acquired on /root/.cache/torch/transformers/35d8b9d36faaf46728a0192d82bf7d00137490cd6074e8500778afed552a67e5.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2.lock\n", | |
"04/15/2020 20:12:49 - INFO - transformers.file_utils - https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmprdof9097\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "897ecef9f62e46449dc5da3677f31a51", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
"HBox(children=(IntProgress(value=0, description='Downloading', max=435779157, style=ProgressStyle(description_…" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
} | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"04/15/2020 20:12:56 - INFO - transformers.file_utils - storing https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-pytorch_model.bin in cache at /root/.cache/torch/transformers/35d8b9d36faaf46728a0192d82bf7d00137490cd6074e8500778afed552a67e5.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2\n", | |
"04/15/2020 20:12:56 - INFO - transformers.file_utils - creating metadata file for /root/.cache/torch/transformers/35d8b9d36faaf46728a0192d82bf7d00137490cd6074e8500778afed552a67e5.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2\n", | |
"04/15/2020 20:12:56 - INFO - filelock - Lock 139915764977336 released on /root/.cache/torch/transformers/35d8b9d36faaf46728a0192d82bf7d00137490cd6074e8500778afed552a67e5.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2.lock\n", | |
"04/15/2020 20:12:56 - INFO - transformers.modeling_utils - loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-pytorch_model.bin from cache at /root/.cache/torch/transformers/35d8b9d36faaf46728a0192d82bf7d00137490cd6074e8500778afed552a67e5.3fadbea36527ae472139fe84cddaa65454d7429f12d543d80bfc3ad70de55ac2\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"04/15/2020 20:12:59 - WARNING - farm.modeling.language_model - Could not automatically detect from language model name what language it is. \n", | |
"\t We guess it's an *ENGLISH* model ... \n", | |
"\t If not: Init the language model by supplying the 'language' param.\n", | |
"04/15/2020 20:12:59 - INFO - farm.modeling.prediction_head - Prediction head initialized with size [768, 13]\n", | |
"04/15/2020 20:13:16 - INFO - farm.modeling.optimization - Loading optimizer `TransformersAdamW`: '{'correct_bias': False, 'weight_decay': 0.01, 'lr': 5e-05}'\n", | |
"04/15/2020 20:13:16 - INFO - farm.modeling.optimization - Using scheduler 'get_linear_schedule_with_warmup'\n", | |
"04/15/2020 20:13:17 - INFO - farm.modeling.optimization - Loading schedule `get_linear_schedule_with_warmup`: '{'num_training_steps': 440, 'num_warmup_steps': 176}'\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "LdUzNWzkhL9F", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 1000 | |
}, | |
"outputId": "9d9420bb-996b-4804-9df1-131fabc0466b" | |
}, | |
"source": [ | |
"\n", | |
"# 7. Let it grow\n", | |
"trainer.train()\n" | |
], | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"04/15/2020 20:13:19 - INFO - farm.train - \n", | |
" \n", | |
"\n", | |
" &&& && & && _____ _ \n", | |
" && &\\/&\\|& ()|/ @, && / ____| (_) \n", | |
" &\\/(/&/&||/& /_/)_&/_& | | __ _ __ _____ ___ _ __ __ _ \n", | |
" &() &\\/&|()|/&\\/ '%\" & () | | |_ | '__/ _ \\ \\ /\\ / / | '_ \\ / _` |\n", | |
" &_\\_&&_\\ |& |&&/&__%_/_& && | |__| | | | (_) \\ V V /| | | | | (_| |\n", | |
"&& && & &| &| /& & % ()& /&& \\_____|_| \\___/ \\_/\\_/ |_|_| |_|\\__, |\n", | |
" ()&_---()&\\&\\|&&-&&--%---()~ __/ |\n", | |
" && \\||| |___/\n", | |
" |||\n", | |
" |||\n", | |
" |||\n", | |
" , -=-~ .-^- _\n", | |
" `\n", | |
"\n", | |
"Train epoch 0/2 (Cur. train loss: 0.0387): 100%|██████████| 220/220 [02:46<00:00, 1.32it/s]\n", | |
"Train epoch 1/2 (Cur. train loss: 0.0323): 82%|████████▏ | 180/220 [02:15<00:28, 1.39it/s]\n", | |
"Evaluating: 0%| | 0/51 [00:00<?, ?it/s]\u001b[A\n", | |
"Evaluating: 100%|██████████| 51/51 [00:18<00:00, 2.71it/s]\n", | |
"04/15/2020 20:18:41 - INFO - farm.eval - \n", | |
"\n", | |
"\\\\|// \\\\|// \\\\|// \\\\|// \\\\|//\n", | |
"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
"***************************************************\n", | |
"***** EVALUATION | DEV SET | AFTER 400 BATCHES *****\n", | |
"***************************************************\n", | |
"\\\\|// \\\\|// \\\\|// \\\\|// \\\\|//\n", | |
"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
"\n", | |
"04/15/2020 20:18:41 - INFO - farm.eval - \n", | |
" _________ ner _________\n", | |
"04/15/2020 20:18:42 - INFO - farm.eval - loss: 0.6635722540341891\n", | |
"04/15/2020 20:18:42 - INFO - farm.eval - task_name: ner\n", | |
"04/15/2020 20:18:42 - INFO - farm.eval - seq_f1: 0.939728779507785\n", | |
"04/15/2020 20:18:42 - INFO - farm.eval - report: \n", | |
" precision recall f1-score support\n", | |
"\n", | |
" LOC 0.96 0.96 0.96 1837\n", | |
" MISC 0.89 0.88 0.89 922\n", | |
" PER 0.97 0.97 0.97 1836\n", | |
" ORG 0.90 0.93 0.91 1341\n", | |
"\n", | |
"micro avg 0.93 0.95 0.94 5936\n", | |
"macro avg 0.94 0.95 0.94 5936\n", | |
"\n", | |
"Train epoch 1/2 (Cur. train loss: 0.0081): 100%|██████████| 220/220 [03:05<00:00, 1.19it/s]\n", | |
"Evaluating: 100%|██████████| 54/54 [00:19<00:00, 2.71it/s]\n", | |
"04/15/2020 20:19:31 - INFO - farm.eval - \n", | |
"\n", | |
"\\\\|// \\\\|// \\\\|// \\\\|// \\\\|//\n", | |
"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
"***************************************************\n", | |
"***** EVALUATION | TEST SET | AFTER 440 BATCHES *****\n", | |
"***************************************************\n", | |
"\\\\|// \\\\|// \\\\|// \\\\|// \\\\|//\n", | |
"^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
"\n", | |
"04/15/2020 20:19:31 - INFO - farm.eval - \n", | |
" _________ ner _________\n", | |
"04/15/2020 20:19:32 - INFO - farm.eval - loss: 1.3053936329987164\n", | |
"04/15/2020 20:19:32 - INFO - farm.eval - task_name: ner\n", | |
"04/15/2020 20:19:32 - INFO - farm.eval - seq_f1: 0.9011739968459787\n", | |
"04/15/2020 20:19:32 - INFO - farm.eval - report: \n", | |
" precision recall f1-score support\n", | |
"\n", | |
" ORG 0.86 0.90 0.88 1661\n", | |
" PER 0.96 0.95 0.95 1615\n", | |
" LOC 0.93 0.92 0.93 1666\n", | |
" MISC 0.75 0.82 0.79 702\n", | |
"\n", | |
"micro avg 0.89 0.91 0.90 5644\n", | |
"macro avg 0.90 0.91 0.90 5644\n", | |
"\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"AdaptiveModel(\n", | |
" (language_model): Bert(\n", | |
" (model): BertModel(\n", | |
" (embeddings): BertEmbeddings(\n", | |
" (word_embeddings): Embedding(28996, 768, padding_idx=0)\n", | |
" (position_embeddings): Embedding(512, 768)\n", | |
" (token_type_embeddings): Embedding(2, 768)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" (encoder): BertEncoder(\n", | |
" (layer): ModuleList(\n", | |
" (0): BertLayer(\n", | |
" (attention): BertAttention(\n", | |
" (self): BertSelfAttention(\n", | |
" (query): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (key): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (value): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" (output): BertSelfOutput(\n", | |
" (dense): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (intermediate): BertIntermediate(\n", | |
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n", | |
" )\n", | |
" (output): BertOutput(\n", | |
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (1): BertLayer(\n", | |
" (attention): BertAttention(\n", | |
" (self): BertSelfAttention(\n", | |
" (query): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (key): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (value): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" (output): BertSelfOutput(\n", | |
" (dense): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (intermediate): BertIntermediate(\n", | |
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n", | |
" )\n", | |
" (output): BertOutput(\n", | |
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (2): BertLayer(\n", | |
" (attention): BertAttention(\n", | |
" (self): BertSelfAttention(\n", | |
" (query): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (key): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (value): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" (output): BertSelfOutput(\n", | |
" (dense): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (intermediate): BertIntermediate(\n", | |
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n", | |
" )\n", | |
" (output): BertOutput(\n", | |
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (3): BertLayer(\n", | |
" (attention): BertAttention(\n", | |
" (self): BertSelfAttention(\n", | |
" (query): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (key): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (value): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" (output): BertSelfOutput(\n", | |
" (dense): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (intermediate): BertIntermediate(\n", | |
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n", | |
" )\n", | |
" (output): BertOutput(\n", | |
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (4): BertLayer(\n", | |
" (attention): BertAttention(\n", | |
" (self): BertSelfAttention(\n", | |
" (query): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (key): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (value): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" (output): BertSelfOutput(\n", | |
" (dense): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (intermediate): BertIntermediate(\n", | |
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n", | |
" )\n", | |
" (output): BertOutput(\n", | |
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (5): BertLayer(\n", | |
" (attention): BertAttention(\n", | |
" (self): BertSelfAttention(\n", | |
" (query): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (key): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (value): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" (output): BertSelfOutput(\n", | |
" (dense): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (intermediate): BertIntermediate(\n", | |
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n", | |
" )\n", | |
" (output): BertOutput(\n", | |
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (6): BertLayer(\n", | |
" (attention): BertAttention(\n", | |
" (self): BertSelfAttention(\n", | |
" (query): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (key): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (value): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" (output): BertSelfOutput(\n", | |
" (dense): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (intermediate): BertIntermediate(\n", | |
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n", | |
" )\n", | |
" (output): BertOutput(\n", | |
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (7): BertLayer(\n", | |
" (attention): BertAttention(\n", | |
" (self): BertSelfAttention(\n", | |
" (query): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (key): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (value): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" (output): BertSelfOutput(\n", | |
" (dense): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (intermediate): BertIntermediate(\n", | |
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n", | |
" )\n", | |
" (output): BertOutput(\n", | |
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (8): BertLayer(\n", | |
" (attention): BertAttention(\n", | |
" (self): BertSelfAttention(\n", | |
" (query): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (key): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (value): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" (output): BertSelfOutput(\n", | |
" (dense): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (intermediate): BertIntermediate(\n", | |
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n", | |
" )\n", | |
" (output): BertOutput(\n", | |
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (9): BertLayer(\n", | |
" (attention): BertAttention(\n", | |
" (self): BertSelfAttention(\n", | |
" (query): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (key): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (value): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" (output): BertSelfOutput(\n", | |
" (dense): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (intermediate): BertIntermediate(\n", | |
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n", | |
" )\n", | |
" (output): BertOutput(\n", | |
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (10): BertLayer(\n", | |
" (attention): BertAttention(\n", | |
" (self): BertSelfAttention(\n", | |
" (query): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (key): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (value): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" (output): BertSelfOutput(\n", | |
" (dense): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (intermediate): BertIntermediate(\n", | |
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n", | |
" )\n", | |
" (output): BertOutput(\n", | |
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (11): BertLayer(\n", | |
" (attention): BertAttention(\n", | |
" (self): BertSelfAttention(\n", | |
" (query): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (key): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (value): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" (output): BertSelfOutput(\n", | |
" (dense): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" (intermediate): BertIntermediate(\n", | |
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n", | |
" )\n", | |
" (output): BertOutput(\n", | |
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n", | |
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
" )\n", | |
" )\n", | |
" )\n", | |
" )\n", | |
" (pooler): BertPooler(\n", | |
" (dense): Linear(in_features=768, out_features=768, bias=True)\n", | |
" (activation): Tanh()\n", | |
" )\n", | |
" )\n", | |
" )\n", | |
" (prediction_heads): ModuleList(\n", | |
" (0): TokenClassificationHead(\n", | |
" (feed_forward): FeedForwardBlock(\n", | |
" (feed_forward): Sequential(\n", | |
" (0): Linear(in_features=768, out_features=13, bias=True)\n", | |
" )\n", | |
" )\n", | |
" (loss_fct): CrossEntropyLoss()\n", | |
" )\n", | |
" )\n", | |
" (dropout): Dropout(p=0.1, inplace=False)\n", | |
")" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 10 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "sQTxu3_1i_Tp", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"\n", | |
"# 8. Hooray! You have a model. Store it:\n", | |
"save_dir = \"saved_models_bert-en-ner3\"\n", | |
"model.save(save_dir)\n", | |
"processor.save(save_dir)\n" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "EvBGMJK3li9k", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 1000 | |
}, | |
"outputId": "6e2fccde-a6d3-495f-c46c-4213bd3c040f" | |
}, | |
"source": [ | |
"model = Inferencer.load(save_dir)\n", | |
"\n", | |
"# 9. Load it & harvest your fruits (Inference)\n", | |
"basic_texts = [\n", | |
" {\"text\": \"I love it when I see Obama in Japan.\"},\n", | |
" {\"text\": \"Not limiting global temperature rise to 1.5 degrees would mean trillions of dollars in economic losses, heat extremes in all inhabited parts of the planet, die-off of large parts of the Amazon rainforest, and millions of climate refugees.\"},\n", | |
" {\"text\": \"August 22, 2019 Amazon’s new plastic packaging has caused outrage among customers and environmental activists who’ve branded it major step backwards.\"},\n", | |
"]\n", | |
"\n", | |
"result = model.inference_from_dicts(dicts=basic_texts)\n", | |
"print(result)\n" | |
], | |
"execution_count": 13, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"04/15/2020 20:20:12 - INFO - farm.utils - device: cpu n_gpu: 0, distributed training: False, automatic mixed precision training: None\n", | |
"04/15/2020 20:20:12 - INFO - transformers.modeling_utils - loading weights file saved_models_bert-en-ner3/language_model.bin from cache at saved_models_bert-en-ner3/language_model.bin\n", | |
"04/15/2020 20:20:14 - INFO - farm.modeling.adaptive_model - Found files for loading 1 prediction heads\n", | |
"04/15/2020 20:20:14 - WARNING - farm.modeling.prediction_head - `layer_dims` will be deprecated in future releases\n", | |
"04/15/2020 20:20:14 - INFO - farm.modeling.prediction_head - Prediction head initialized with size [768, 13]\n", | |
"04/15/2020 20:20:14 - INFO - farm.modeling.prediction_head - Loading prediction head from saved_models_bert-en-ner3/prediction_head_0.bin\n", | |
"04/15/2020 20:20:14 - WARNING - farm.modeling.adaptive_model - ML logging didn't work: INVALID_PARAMETER_VALUE: Changing param value is not allowed. Param with key='lm_name' was already logged with value='bert-base-cased' for run ID='7c43c4979601470298f408ded7556409. Attempted logging new value 'saved_models_bert-en-ner3'.\n", | |
"04/15/2020 20:20:14 - INFO - transformers.tokenization_utils - Model name 'saved_models_bert-en-ner3' not found in model shortcut name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese, bert-base-german-cased, bert-large-uncased-whole-word-masking, bert-large-cased-whole-word-masking, bert-large-uncased-whole-word-masking-finetuned-squad, bert-large-cased-whole-word-masking-finetuned-squad, bert-base-cased-finetuned-mrpc, bert-base-german-dbmdz-cased, bert-base-german-dbmdz-uncased, bert-base-finnish-cased-v1, bert-base-finnish-uncased-v1, bert-base-dutch-cased). Assuming 'saved_models_bert-en-ner3' is a path, a model identifier, or url to a directory containing tokenizer files.\n", | |
"04/15/2020 20:20:14 - INFO - transformers.tokenization_utils - Didn't find file saved_models_bert-en-ner3/added_tokens.json. We won't load it.\n", | |
"04/15/2020 20:20:14 - INFO - transformers.tokenization_utils - loading file saved_models_bert-en-ner3/vocab.txt\n", | |
"04/15/2020 20:20:14 - INFO - transformers.tokenization_utils - loading file None\n", | |
"04/15/2020 20:20:14 - INFO - transformers.tokenization_utils - loading file saved_models_bert-en-ner3/special_tokens_map.json\n", | |
"04/15/2020 20:20:14 - INFO - transformers.tokenization_utils - loading file saved_models_bert-en-ner3/tokenizer_config.json\n", | |
"04/15/2020 20:20:15 - INFO - farm.data_handler.processor - Initialized processor without tasks. Supply `metric` and `label_list` to the constructor for using the default task or add a custom task later via processor.add_task()\n", | |
"04/15/2020 20:20:15 - INFO - farm.utils - device: cpu n_gpu: 0, distributed training: False, automatic mixed precision training: None\n", | |
"04/15/2020 20:20:15 - INFO - farm.infer - Got ya 1 parallel workers to do inference on dicts (chunksize = 4)...\n", | |
"04/15/2020 20:20:15 - INFO - farm.infer - 0 \n", | |
"04/15/2020 20:20:15 - INFO - farm.infer - /w\\\n", | |
"04/15/2020 20:20:15 - INFO - farm.infer - /'\\\n", | |
"04/15/2020 20:20:15 - INFO - farm.infer - \n", | |
"04/15/2020 20:20:15 - WARNING - farm.data_handler.input_features - [Task: ner] Could not convert labels to ids via label_list!\n", | |
"If your are running in *inference* mode: Don't worry!\n", | |
"If you are running in *training* mode: Verify you are supplying a proper label list to your processor and check that labels in input data are correct.\n", | |
"04/15/2020 20:20:15 - WARNING - farm.data_handler.input_features - [Task: ner] Could not convert labels to ids via label_list!\n", | |
"If your are running in *inference* mode: Don't worry!\n", | |
"If you are running in *training* mode: Verify you are supplying a proper label list to your processor and check that labels in input data are correct.\n", | |
"04/15/2020 20:20:15 - WARNING - farm.data_handler.input_features - [Task: ner] Could not convert labels to ids via label_list!\n", | |
"If your are running in *inference* mode: Don't worry!\n", | |
"If you are running in *training* mode: Verify you are supplying a proper label list to your processor and check that labels in input data are correct.\n", | |
"04/15/2020 20:20:15 - INFO - farm.data_handler.processor - *** Show 2 random examples ***\n", | |
"04/15/2020 20:20:15 - INFO - farm.data_handler.processor - \n", | |
"\n", | |
" .--. _____ _ \n", | |
" .'_\\/_'. / ____| | | \n", | |
" '. /\\ .' | (___ __ _ _ __ ___ _ __ | | ___ \n", | |
" \"||\" \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n", | |
" || /\\ ____) | (_| | | | | | | |_) | | __/\n", | |
" /\\ ||//\\) |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n", | |
" (/\\||/ |_| \n", | |
"______\\||/___________________________________________ \n", | |
"\n", | |
"ID: train-2-0\n", | |
"Clear Text: \n", | |
" \ttext: August 22, 2019 Amazon’s new plastic packaging has caused outrage among customers and environmental activists who’ve branded it major step backwards.\n", | |
"Tokenized: \n", | |
" \ttokens: ['August', '22', ',', '2019', 'Amazon', '’', 's', 'new', 'plastic', 'packaging', 'has', 'caused', 'outrage', 'among', 'customers', 'and', 'environmental', 'activists', 'who', '’', 've', 'branded', 'it', 'major', 'step', 'backwards', '.']\n", | |
" \toffsets: [0, 7, 9, 11, 16, 22, 23, 25, 29, 37, 47, 51, 58, 66, 72, 82, 86, 100, 110, 113, 114, 117, 125, 128, 134, 139, 148]\n", | |
" \tstart_of_word: [True, True, False, True, True, False, False, True, True, True, True, True, True, True, True, True, True, True, True, False, False, True, True, True, True, True, False]\n", | |
"Features: \n", | |
" \tinput_ids: [101, 1360, 1659, 117, 10351, 9786, 787, 188, 1207, 5828, 17019, 1144, 2416, 22052, 1621, 5793, 1105, 4801, 10254, 1150, 787, 1396, 11450, 1122, 1558, 2585, 11316, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tinitial_mask: [0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
"_____________________________________________________\n", | |
"04/15/2020 20:20:15 - INFO - farm.data_handler.processor - \n", | |
"\n", | |
" .--. _____ _ \n", | |
" .'_\\/_'. / ____| | | \n", | |
" '. /\\ .' | (___ __ _ _ __ ___ _ __ | | ___ \n", | |
" \"||\" \\___ \\ / _` | '_ ` _ \\| '_ \\| |/ _ \\ \n", | |
" || /\\ ____) | (_| | | | | | | |_) | | __/\n", | |
" /\\ ||//\\) |_____/ \\__,_|_| |_| |_| .__/|_|\\___|\n", | |
" (/\\||/ |_| \n", | |
"______\\||/___________________________________________ \n", | |
"\n", | |
"ID: train-0-0\n", | |
"Clear Text: \n", | |
" \ttext: I love it when I see Obama in Japan.\n", | |
"Tokenized: \n", | |
" \ttokens: ['I', 'love', 'it', 'when', 'I', 'see', 'Obama', 'in', 'Japan', '.']\n", | |
" \toffsets: [0, 2, 7, 10, 15, 17, 21, 27, 30, 35]\n", | |
" \tstart_of_word: [True, True, True, True, True, True, True, True, True, False]\n", | |
"Features: \n", | |
" \tinput_ids: [101, 146, 1567, 1122, 1165, 146, 1267, 7661, 1107, 1999, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tpadding_mask: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tsegment_ids: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
" \tinitial_mask: [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", | |
"_____________________________________________________\n" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[{'task': 'ner', 'predictions': [{'start': 21, 'end': 26, 'context': 'Obama', 'label': 'PER', 'probability': 0.99587303}, {'start': 30, 'end': 36, 'context': 'Japan.', 'label': 'LOC', 'probability': 0.9992192}, {'start': 186, 'end': 192, 'context': 'Amazon', 'label': 'LOC', 'probability': 0.99963367}, {'start': 16, 'end': 24, 'context': 'Amazon’s', 'label': 'ORG', 'probability': 0.99936336}]}]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "6kiNl6BZlZwt", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"print(result)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "dGFLE2z9l6qy", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment