Skip to content

Instantly share code, notes, and snippets.

@emillykkejensen
Forked from avidale/create_rut5-base.ipynb
Last active January 6, 2022 10:51
Show Gist options
  • Save emillykkejensen/8bf1b323495efc7252dee966e6bc1b5c to your computer and use it in GitHub Desktop.
Save emillykkejensen/8bf1b323495efc7252dee966e6bc1b5c to your computer and use it in GitHub Desktop.
create_daT5-base.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "create_daT5-base.ipynb",
"provenance": [],
"collapsed_sections": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"7cb4b97fba8d401f99b0e99cc984a7d7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_7aa056e172684bf9a348740630964c91",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_622277c193f44cd196228edf9e00d87d",
"IPY_MODEL_41ee529a0659492aafc349b1dd17abac",
"IPY_MODEL_470513443ed4419b9ee002ef7cdea909"
]
}
},
"7aa056e172684bf9a348740630964c91": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"622277c193f44cd196228edf9e00d87d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_41d8060df9824ec1ab8db61963f58048",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "Downloading: 100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_70f57885f83e46d794627d30b5302e2c"
}
},
"41ee529a0659492aafc349b1dd17abac": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_34cf4b8d5d1b4cd998f1a276b21076ec",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 4309802,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 4309802,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_1e6bc4df33904c8b83ebd038b84a80de"
}
},
"470513443ed4419b9ee002ef7cdea909": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_77048a32d8474fef9ec4e759bf0380fb",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 4.11M/4.11M [00:00<00:00, 5.50MB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_d3ae8eaf9c474504a3b6fc90cfe47768"
}
},
"41d8060df9824ec1ab8db61963f58048": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"70f57885f83e46d794627d30b5302e2c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"34cf4b8d5d1b4cd998f1a276b21076ec": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"1e6bc4df33904c8b83ebd038b84a80de": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"77048a32d8474fef9ec4e759bf0380fb": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"d3ae8eaf9c474504a3b6fc90cfe47768": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"ca64ec309903401e8cb5379bcffd24e5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_4516aa06937c490f9070c6fcd41ba532",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_72c873142fcc4738951c472c84dc8705",
"IPY_MODEL_c83ec79e3a314c6a97f96b29208e3343",
"IPY_MODEL_ff924bf1d8894f2086323c6d854e3473"
]
}
},
"4516aa06937c490f9070c6fcd41ba532": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"72c873142fcc4738951c472c84dc8705": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_6c98b754333f4d71b7fbd0bfc90426fe",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "Downloading: 100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_7da98f15686f4f35b86c80e6ea46f84b"
}
},
"c83ec79e3a314c6a97f96b29208e3343": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_6f572a00d58c43858a54fef8fe202fa5",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 65,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 65,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_f64c01800610460e94b4069b0e5a4fda"
}
},
"ff924bf1d8894f2086323c6d854e3473": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_6d9551b36d60465ca16ac2386de0c9bb",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 65.0/65.0 [00:00<00:00, 1.70kB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_db644517049642218033513112fc5e41"
}
},
"6c98b754333f4d71b7fbd0bfc90426fe": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"7da98f15686f4f35b86c80e6ea46f84b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"6f572a00d58c43858a54fef8fe202fa5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"f64c01800610460e94b4069b0e5a4fda": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"6d9551b36d60465ca16ac2386de0c9bb": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"db644517049642218033513112fc5e41": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"b13c991949c64469bc1b144c59fa76e2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_6cab840322e043ba8bdeb271aaa2d451",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_e4c5fedab2e6402a947cb1df9a7d3f9e",
"IPY_MODEL_7348975b164b4e4f8c898e678554d1c2",
"IPY_MODEL_520ff29c681f4c2dafa8973a373f7094"
]
}
},
"6cab840322e043ba8bdeb271aaa2d451": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"e4c5fedab2e6402a947cb1df9a7d3f9e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_068be5b78bca41a887ceb4fe58710ea0",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "Downloading: 100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_d3ce481f18ee4c0eb73e6b17c9408e87"
}
},
"7348975b164b4e4f8c898e678554d1c2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_23ad3b4277da42faa8ebced67e9beb94",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 376,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 376,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_f11fe8adb0384c41a0d2c40195a93b58"
}
},
"520ff29c681f4c2dafa8973a373f7094": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_927feb7db9134e169cfc8942758df769",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 376/376 [00:00<00:00, 8.92kB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_24b93f86236f4ddaa235f8eca35901ed"
}
},
"068be5b78bca41a887ceb4fe58710ea0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"d3ce481f18ee4c0eb73e6b17c9408e87": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"23ad3b4277da42faa8ebced67e9beb94": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"f11fe8adb0384c41a0d2c40195a93b58": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"927feb7db9134e169cfc8942758df769": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"24b93f86236f4ddaa235f8eca35901ed": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"ce2144cace494f54b55edb43bdd2cccf": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_17c8dd3c11814dddbf6ce87e360b0a53",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_610c6445a3f04c22a57db851238a4c0d",
"IPY_MODEL_8a5264644b2546f7941b04196c1375d1",
"IPY_MODEL_30b61fd6388d4bb4a03de5b0278b19a0"
]
}
},
"17c8dd3c11814dddbf6ce87e360b0a53": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"610c6445a3f04c22a57db851238a4c0d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_a3d59abacb7146e68178197909faffcd",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "Downloading: 100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_9a5bf41d78324707893a41a340090e2b"
}
},
"8a5264644b2546f7941b04196c1375d1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_16aff1f56bd04711bbe12c71a8281d68",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 702,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 702,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_c69671882bce4c5baf8cf2dcc94a0ee7"
}
},
"30b61fd6388d4bb4a03de5b0278b19a0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_e4367ada853d43cbb285ac43fe2fc701",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 702/702 [00:00<00:00, 17.3kB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_d69d708e0cc4461fa3d0a7ff62481721"
}
},
"a3d59abacb7146e68178197909faffcd": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"9a5bf41d78324707893a41a340090e2b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"16aff1f56bd04711bbe12c71a8281d68": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"c69671882bce4c5baf8cf2dcc94a0ee7": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"e4367ada853d43cbb285ac43fe2fc701": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"d69d708e0cc4461fa3d0a7ff62481721": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"14140e544f534306b04849facd847b52": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_3baa2f6358444a7f9c92f27be194ad0e",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_e9c417c5d3d94d6da0c6f2dd3a77189e",
"IPY_MODEL_e8fc71e7b5fb4d4e9112592390d9eaf8",
"IPY_MODEL_ca772b9b99e64b6480947a555ec3a58d"
]
}
},
"3baa2f6358444a7f9c92f27be194ad0e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"e9c417c5d3d94d6da0c6f2dd3a77189e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_2602526af0524504a6a54f1eb4919b43",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "Downloading: 100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_3f751fb6e0254acfb1a362298b786cbf"
}
},
"e8fc71e7b5fb4d4e9112592390d9eaf8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_3e888848689e41919ebe311d02e88820",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 72,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 72,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_c8768c226c0743d4aff02a6f1f2828da"
}
},
"ca772b9b99e64b6480947a555ec3a58d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_4d11243ebd2945568745731e2e04005e",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 72.0/72.0 [00:00<00:00, 1.25kB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_eb695f5ae02849d2bb079ffeb575c321"
}
},
"2602526af0524504a6a54f1eb4919b43": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"3f751fb6e0254acfb1a362298b786cbf": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"3e888848689e41919ebe311d02e88820": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"c8768c226c0743d4aff02a6f1f2828da": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"4d11243ebd2945568745731e2e04005e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"eb695f5ae02849d2bb079ffeb575c321": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"fef663dae07942ed85e69e6aa98b8e72": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_5cc40837012244f88d562cc6141ae9da",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_58d8f4689e6242599d19424d2c3616e5",
"IPY_MODEL_5e4cd7d00dea4e6b919cd20c27c0ece8",
"IPY_MODEL_3543976f58f145ab9cc694cd0b73e1a0"
]
}
},
"5cc40837012244f88d562cc6141ae9da": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"58d8f4689e6242599d19424d2c3616e5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_388e2e6ad1d34d1fb569978289ff9c9c",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "Downloading: 100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_1dd5034572eb4db69c70fa811bce825d"
}
},
"5e4cd7d00dea4e6b919cd20c27c0ece8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_f39a52aebf774913b7035578a02d5fa6",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 662,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 662,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_1dfc42c61cfc443b8fc5ab5f4a79f32f"
}
},
"3543976f58f145ab9cc694cd0b73e1a0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_7c79cd29e2f74949b84aa9fad181ecb5",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 662/662 [00:00<00:00, 11.9kB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_7d743bee83f0477ba08fd8ca44a9d817"
}
},
"388e2e6ad1d34d1fb569978289ff9c9c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"1dd5034572eb4db69c70fa811bce825d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"f39a52aebf774913b7035578a02d5fa6": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"1dfc42c61cfc443b8fc5ab5f4a79f32f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"7c79cd29e2f74949b84aa9fad181ecb5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"7d743bee83f0477ba08fd8ca44a9d817": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"b680e54583d84a86832eb605285f7a56": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_e7970061b05a4d918576c2c967ddd046",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_21934c67035f4a90bf9ff7c05d7dac2e",
"IPY_MODEL_6578ccecf1c848cab59935e8bc073289",
"IPY_MODEL_8ed2ec969afd471bb7ae86a743b5c090"
]
}
},
"e7970061b05a4d918576c2c967ddd046": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"21934c67035f4a90bf9ff7c05d7dac2e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_9dfa90b6728b42bd8f8dc879c197e5c6",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "Downloading: 100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_77e664b055db42e59448d4f050ba4cd2"
}
},
"6578ccecf1c848cab59935e8bc073289": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_9759a2a0cc874609b04880e666373f9e",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 253349,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 253349,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_bbda5c72dafc4f8d9934dba013a5daf7"
}
},
"8ed2ec969afd471bb7ae86a743b5c090": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_8ee71a601c2343cbac50f666b53f6737",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 247k/247k [00:00<00:00, 652kB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_ab61bead8b8f4df894d124fb15c9761c"
}
},
"9dfa90b6728b42bd8f8dc879c197e5c6": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"77e664b055db42e59448d4f050ba4cd2": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"9759a2a0cc874609b04880e666373f9e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"bbda5c72dafc4f8d9934dba013a5daf7": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"8ee71a601c2343cbac50f666b53f6737": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"ab61bead8b8f4df894d124fb15c9761c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"ff02249764fd486aa76b85fd0b897fa2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_d2593ed063b04c8393540238099106fb",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_62b5aa68bd5843fdb86766f69579906a",
"IPY_MODEL_8879dc4f5ac04276add412cb45e33778",
"IPY_MODEL_d420cb55bd5a431cbca36ba13f58646c"
]
}
},
"d2593ed063b04c8393540238099106fb": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"62b5aa68bd5843fdb86766f69579906a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_2b660317227d4fef83fa0f3454531f46",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "Downloading: 100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_9d0aa21709794fd9abb431cecbd47409"
}
},
"8879dc4f5ac04276add412cb45e33778": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_e58c3a5a125a47c6b1008236907e7657",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 2329735129,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 2329735129,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_837e25dcfa094941aef01f735cf03683"
}
},
"d420cb55bd5a431cbca36ba13f58646c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_21ccd08a71064febaaf7fdd369cf73a8",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 2.17G/2.17G [00:49<00:00, 47.7MB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_b4ba4629fdaa45f08228f89becdc8257"
}
},
"2b660317227d4fef83fa0f3454531f46": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"9d0aa21709794fd9abb431cecbd47409": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"e58c3a5a125a47c6b1008236907e7657": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"837e25dcfa094941aef01f735cf03683": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"21ccd08a71064febaaf7fdd369cf73a8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"b4ba4629fdaa45f08228f89becdc8257": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"8be7375611d04b4dab1925150addffc4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_6a20ac71356840cd99028f437b5cb387",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_5da72ccbfa8a4a8a8f1fa9b6abcb7af5",
"IPY_MODEL_40c503e339dc4960981e5d1591bb2619",
"IPY_MODEL_53279568c10f4af6808219621022dbe8"
]
}
},
"6a20ac71356840cd99028f437b5cb387": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"5da72ccbfa8a4a8a8f1fa9b6abcb7af5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_b65b1c4b41534a908176baef5133f37a",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_8ffa38100a6c44b6ab2b372586cf2029"
}
},
"40c503e339dc4960981e5d1591bb2619": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_875a85d050754cc98e382fd7e07c48ef",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 1000000,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 1000000,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_e76a0ef044ab49eaae844a14a482f909"
}
},
"53279568c10f4af6808219621022dbe8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_8e93af770b1b4895aa6ff7eb97a50875",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 1000000/1000000 [04:27<00:00, 3743.65it/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_86128334b1fe4fd58507e2c9a8eeb525"
}
},
"b65b1c4b41534a908176baef5133f37a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"8ffa38100a6c44b6ab2b372586cf2029": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"875a85d050754cc98e382fd7e07c48ef": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"e76a0ef044ab49eaae844a14a482f909": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"8e93af770b1b4895aa6ff7eb97a50875": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"86128334b1fe4fd58507e2c9a8eeb525": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"a084a3da9ccd458b9939dc8c90ecee01": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_413d3dfc417d44f088972d388e28d039",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_381356f5f1f54eaea7383c82aac12e87",
"IPY_MODEL_af518f9b4405463781dd3be1c4a33c40",
"IPY_MODEL_0ba2bba408fe4efa86380a1fd215429e"
]
}
},
"413d3dfc417d44f088972d388e28d039": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"381356f5f1f54eaea7383c82aac12e87": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_b65ce4fef67f41b6bed67d15b92b38bc",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_b8681aba32c44c70aa07c967a89311d8"
}
},
"af518f9b4405463781dd3be1c4a33c40": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_f779fe22ed5946e896427923fe8b2f7a",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 1000000,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 1000000,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_948a35328c3247b4be0b81504c0a5a2b"
}
},
"0ba2bba408fe4efa86380a1fd215429e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_5d76be2426c14c0da8fcf7ce072281db",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 1000000/1000000 [04:37<00:00, 3746.29it/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_7da188e06e5846cbb98a29c9c6ebf240"
}
},
"b65ce4fef67f41b6bed67d15b92b38bc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"b8681aba32c44c70aa07c967a89311d8": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"f779fe22ed5946e896427923fe8b2f7a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"948a35328c3247b4be0b81504c0a5a2b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"5d76be2426c14c0da8fcf7ce072281db": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"7da188e06e5846cbb98a29c9c6ebf240": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"9ccd8c47002b43249f0e07459a5886f9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_d00bf7d8a3d0488eb344244249df0fe2",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_fc5e059d553b4a72a53223ef235f1973",
"IPY_MODEL_6dd473af4dad45ea86641442e27174da",
"IPY_MODEL_ea4a96de78da4b939a2864125d5d90fd"
]
}
},
"d00bf7d8a3d0488eb344244249df0fe2": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"fc5e059d553b4a72a53223ef235f1973": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_5258b3f8b92f4f9980f37c36d5d81950",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_5667c1b0fd464a48a936177c661ae657"
}
},
"6dd473af4dad45ea86641442e27174da": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_4012da0c84f74026bae1e204e1480260",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 26677,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 26677,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_34c2f6508dcc4b9789807425f88d9627"
}
},
"ea4a96de78da4b939a2864125d5d90fd": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_99ec014a0d894b1988f86cbbf8e5887d",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 26677/26677 [00:02<00:00, 9740.68it/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_2f313463742f4784a6e2783bb7acec55"
}
},
"5258b3f8b92f4f9980f37c36d5d81950": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"5667c1b0fd464a48a936177c661ae657": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"4012da0c84f74026bae1e204e1480260": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"34c2f6508dcc4b9789807425f88d9627": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"99ec014a0d894b1988f86cbbf8e5887d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"2f313463742f4784a6e2783bb7acec55": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"9afa3fd99f634eca8ba8980fb950b9af": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_5742a9b2a6ea4519bef24cef9afcbc1a",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_36c1ac8da0f7462db9520d6d23eddae1",
"IPY_MODEL_04cd8227f7444ca6b6b8239d52bbe59e",
"IPY_MODEL_caaeb10b148e4777aba09bcc8befbc3c"
]
}
},
"5742a9b2a6ea4519bef24cef9afcbc1a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"36c1ac8da0f7462db9520d6d23eddae1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_19acc50e25684b9481e6f7157239b35b",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_73a2f37bdf534d2bb2f241f8e688fd7e"
}
},
"04cd8227f7444ca6b6b8239d52bbe59e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_56e9f9034fea48ebb267e458b2968d8b",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 226510,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 226510,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_212f300fd08c4b9992f382474f72d31a"
}
},
"caaeb10b148e4777aba09bcc8befbc3c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_ec774f016437475cba927a28515ce44f",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 226510/226510 [00:45<00:00, 21594.34it/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_5365e04683f34410aadff091c2da6c96"
}
},
"19acc50e25684b9481e6f7157239b35b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"73a2f37bdf534d2bb2f241f8e688fd7e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"56e9f9034fea48ebb267e458b2968d8b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"212f300fd08c4b9992f382474f72d31a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"ec774f016437475cba927a28515ce44f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"5365e04683f34410aadff091c2da6c96": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/emillykkejensen/8bf1b323495efc7252dee966e6bc1b5c/notebook.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "oh2xfITDhN2u"
},
"source": [
"Denne notebook er rent copy-paste fra https://gist.github.com/avidale/44cd35bfcdaf8bedf51d97c468cc8001 - lavet af David Dale. Før du kaster dig over den, så læs lige den artikkel han har skrevet på towardsdatascience: https://towardsdatascience.com/how-to-adapt-a-multilingual-t5-model-for-a-single-language-b9f94f3d9c90\n",
"\n",
"Pointen med det her skriv er, at reducere mT5 modellen, så den kun indeholder danske embedings og dermed gerne skulle reducere modellens størrelse ret markant!"
]
},
{
"cell_type": "code",
"metadata": {
"id": "BoiF06nfGvtW",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "a6be3f19-0e00-4489-ad6e-8d657df0656c"
},
"source": [
"!pip install transformers sentencepiece"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting transformers\n",
" Downloading transformers-4.15.0-py3-none-any.whl (3.4 MB)\n",
"\u001b[K |████████████████████████████████| 3.4 MB 13.3 MB/s \n",
"\u001b[?25hCollecting sentencepiece\n",
" Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
"\u001b[K |████████████████████████████████| 1.2 MB 47.6 MB/s \n",
"\u001b[?25hRequirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.8.2)\n",
"Collecting sacremoses\n",
" Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)\n",
"\u001b[K |████████████████████████████████| 895 kB 37.0 MB/s \n",
"\u001b[?25hRequirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.4.0)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.19.5)\n",
"Collecting pyyaml>=5.1\n",
" Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n",
"\u001b[K |████████████████████████████████| 596 kB 46.4 MB/s \n",
"\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.62.3)\n",
"Collecting huggingface-hub<1.0,>=0.1.0\n",
" Downloading huggingface_hub-0.2.1-py3-none-any.whl (61 kB)\n",
"\u001b[K |████████████████████████████████| 61 kB 246 kB/s \n",
"\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n",
"Collecting tokenizers<0.11,>=0.10.1\n",
" Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n",
"\u001b[K |████████████████████████████████| 3.3 MB 23.5 MB/s \n",
"\u001b[?25hRequirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers) (3.10.0.2)\n",
"Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.6)\n",
"Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.6.0)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.10.8)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n",
"Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.1.0)\n",
"Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n",
"Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers, sentencepiece\n",
" Attempting uninstall: pyyaml\n",
" Found existing installation: PyYAML 3.13\n",
" Uninstalling PyYAML-3.13:\n",
" Successfully uninstalled PyYAML-3.13\n",
"Successfully installed huggingface-hub-0.2.1 pyyaml-6.0 sacremoses-0.0.46 sentencepiece-0.1.96 tokenizers-0.10.3 transformers-4.15.0\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "zcVexj3Ye6X3"
},
"source": [
"# Hvordan ser det ud"
]
},
{
"cell_type": "code",
"metadata": {
"id": "X99M7UWoHC9k"
},
"source": [
"from transformers import MT5ForConditionalGeneration, MT5Tokenizer\n",
"import torch\n",
"\n",
"model_name = 'google/mt5-base'\n",
"output_name = 'emillykkejensen/daT5-base'"
],
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 162,
"referenced_widgets": [
"7cb4b97fba8d401f99b0e99cc984a7d7",
"7aa056e172684bf9a348740630964c91",
"622277c193f44cd196228edf9e00d87d",
"41ee529a0659492aafc349b1dd17abac",
"470513443ed4419b9ee002ef7cdea909",
"41d8060df9824ec1ab8db61963f58048",
"70f57885f83e46d794627d30b5302e2c",
"34cf4b8d5d1b4cd998f1a276b21076ec",
"1e6bc4df33904c8b83ebd038b84a80de",
"77048a32d8474fef9ec4e759bf0380fb",
"d3ae8eaf9c474504a3b6fc90cfe47768",
"ca64ec309903401e8cb5379bcffd24e5",
"4516aa06937c490f9070c6fcd41ba532",
"72c873142fcc4738951c472c84dc8705",
"c83ec79e3a314c6a97f96b29208e3343",
"ff924bf1d8894f2086323c6d854e3473",
"6c98b754333f4d71b7fbd0bfc90426fe",
"7da98f15686f4f35b86c80e6ea46f84b",
"6f572a00d58c43858a54fef8fe202fa5",
"f64c01800610460e94b4069b0e5a4fda",
"6d9551b36d60465ca16ac2386de0c9bb",
"db644517049642218033513112fc5e41",
"b13c991949c64469bc1b144c59fa76e2",
"6cab840322e043ba8bdeb271aaa2d451",
"e4c5fedab2e6402a947cb1df9a7d3f9e",
"7348975b164b4e4f8c898e678554d1c2",
"520ff29c681f4c2dafa8973a373f7094",
"068be5b78bca41a887ceb4fe58710ea0",
"d3ce481f18ee4c0eb73e6b17c9408e87",
"23ad3b4277da42faa8ebced67e9beb94",
"f11fe8adb0384c41a0d2c40195a93b58",
"927feb7db9134e169cfc8942758df769",
"24b93f86236f4ddaa235f8eca35901ed",
"ce2144cace494f54b55edb43bdd2cccf",
"17c8dd3c11814dddbf6ce87e360b0a53",
"610c6445a3f04c22a57db851238a4c0d",
"8a5264644b2546f7941b04196c1375d1",
"30b61fd6388d4bb4a03de5b0278b19a0",
"a3d59abacb7146e68178197909faffcd",
"9a5bf41d78324707893a41a340090e2b",
"16aff1f56bd04711bbe12c71a8281d68",
"c69671882bce4c5baf8cf2dcc94a0ee7",
"e4367ada853d43cbb285ac43fe2fc701",
"d69d708e0cc4461fa3d0a7ff62481721"
]
},
"id": "7OnBRq8pHFDN",
"outputId": "46eb89bf-c393-455f-9fa1-0cc022c8124f"
},
"source": [
"tokenizer = MT5Tokenizer.from_pretrained(model_name)\n",
"len(tokenizer.get_vocab())\n",
"\n",
"# Her skal den ikke hentes som fast, da det skaber problemer senere.."
],
"execution_count": 3,
"outputs": [
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7cb4b97fba8d401f99b0e99cc984a7d7",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"Downloading: 0%| | 0.00/4.11M [00:00<?, ?B/s]"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ca64ec309903401e8cb5379bcffd24e5",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"Downloading: 0%| | 0.00/65.0 [00:00<?, ?B/s]"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b13c991949c64469bc1b144c59fa76e2",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"Downloading: 0%| | 0.00/376 [00:00<?, ?B/s]"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ce2144cace494f54b55edb43bdd2cccf",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"Downloading: 0%| | 0.00/702 [00:00<?, ?B/s]"
]
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"250100"
]
},
"metadata": {},
"execution_count": 3
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "YMItls1shI3-"
},
"source": [
"Den originale m5t token indeholder 250K tokens + 100 t5 tokens "
]
},
{
"cell_type": "code",
"source": [
"from transformers import AutoTokenizer\n",
"tokenizerDA = AutoTokenizer.from_pretrained('Maltehb/danish-bert-botxo')\n",
"tokenizerDA"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 150,
"referenced_widgets": [
"14140e544f534306b04849facd847b52",
"3baa2f6358444a7f9c92f27be194ad0e",
"e9c417c5d3d94d6da0c6f2dd3a77189e",
"e8fc71e7b5fb4d4e9112592390d9eaf8",
"ca772b9b99e64b6480947a555ec3a58d",
"2602526af0524504a6a54f1eb4919b43",
"3f751fb6e0254acfb1a362298b786cbf",
"3e888848689e41919ebe311d02e88820",
"c8768c226c0743d4aff02a6f1f2828da",
"4d11243ebd2945568745731e2e04005e",
"eb695f5ae02849d2bb079ffeb575c321",
"fef663dae07942ed85e69e6aa98b8e72",
"5cc40837012244f88d562cc6141ae9da",
"58d8f4689e6242599d19424d2c3616e5",
"5e4cd7d00dea4e6b919cd20c27c0ece8",
"3543976f58f145ab9cc694cd0b73e1a0",
"388e2e6ad1d34d1fb569978289ff9c9c",
"1dd5034572eb4db69c70fa811bce825d",
"f39a52aebf774913b7035578a02d5fa6",
"1dfc42c61cfc443b8fc5ab5f4a79f32f",
"7c79cd29e2f74949b84aa9fad181ecb5",
"7d743bee83f0477ba08fd8ca44a9d817",
"b680e54583d84a86832eb605285f7a56",
"e7970061b05a4d918576c2c967ddd046",
"21934c67035f4a90bf9ff7c05d7dac2e",
"6578ccecf1c848cab59935e8bc073289",
"8ed2ec969afd471bb7ae86a743b5c090",
"9dfa90b6728b42bd8f8dc879c197e5c6",
"77e664b055db42e59448d4f050ba4cd2",
"9759a2a0cc874609b04880e666373f9e",
"bbda5c72dafc4f8d9934dba013a5daf7",
"8ee71a601c2343cbac50f666b53f6737",
"ab61bead8b8f4df894d124fb15c9761c"
]
},
"id": "gMFY3inaUekw",
"outputId": "5282e09f-e8f3-4899-9925-d135c4b40a40"
},
"execution_count": 4,
"outputs": [
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "14140e544f534306b04849facd847b52",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"Downloading: 0%| | 0.00/72.0 [00:00<?, ?B/s]"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fef663dae07942ed85e69e6aa98b8e72",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"Downloading: 0%| | 0.00/662 [00:00<?, ?B/s]"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b680e54583d84a86832eb605285f7a56",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"Downloading: 0%| | 0.00/247k [00:00<?, ?B/s]"
]
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"PreTrainedTokenizerFast(name_or_path='Maltehb/danish-bert-botxo', vocab_size=31748, model_max_len=512, is_fast=True, padding_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'})"
]
},
"metadata": {},
"execution_count": 4
}
]
},
{
"cell_type": "markdown",
"source": [
"Hvis vi henter Malte Højmark-Bertelsens dansk trænet BERT models tokens, så indeholder den \"kun\" 31748 tokens."
],
"metadata": {
"id": "3l_dmDVPUwdN"
}
},
{
"cell_type": "code",
"metadata": {
"id": "HkXHkM6OHJcH",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 240,
"referenced_widgets": [
"ff02249764fd486aa76b85fd0b897fa2",
"d2593ed063b04c8393540238099106fb",
"62b5aa68bd5843fdb86766f69579906a",
"8879dc4f5ac04276add412cb45e33778",
"d420cb55bd5a431cbca36ba13f58646c",
"2b660317227d4fef83fa0f3454531f46",
"9d0aa21709794fd9abb431cecbd47409",
"e58c3a5a125a47c6b1008236907e7657",
"837e25dcfa094941aef01f735cf03683",
"21ccd08a71064febaaf7fdd369cf73a8",
"b4ba4629fdaa45f08228f89becdc8257"
]
},
"outputId": "22a79855-57f0-480a-8534-bb9381f98a83"
},
"source": [
"model = MT5ForConditionalGeneration.from_pretrained(model_name)\n",
"\n",
"def msize(m):\n",
" return sum(p.numel() for p in m.parameters())\n",
"\n",
"original_size = msize(model)\n",
"print(msize(model))\n",
"print(msize(model.shared))\n",
"print('\\nEncoder:')\n",
"print(msize(model.encoder))\n",
"print(msize(model.encoder.block))\n",
"print('\\nDecoder:')\n",
"print(msize(model.decoder))\n",
"print(msize(model.decoder.block))\n",
"print(msize(model.lm_head))"
],
"execution_count": 5,
"outputs": [
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ff02249764fd486aa76b85fd0b897fa2",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"Downloading: 0%| | 0.00/2.17G [00:00<?, ?B/s]"
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"582401280\n",
"192086016\n",
"\n",
"Encoder:\n",
"277040256\n",
"84953472\n",
"\n",
"Decoder:\n",
"305361024\n",
"113274240\n",
"192086016\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"Den originale model indeholder 582k embeddings og fylder hele 2,1 Gb"
],
"metadata": {
"id": "crFF58H4Td9_"
}
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "hmvmyYsyHh2s",
"outputId": "dcffce97-140b-4db7-e9b5-6bf69245751e"
},
"source": [
"print(msize(model.shared) / msize(model))\n",
"print(msize(model.lm_head) / msize(model))"
],
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0.32981729710484153\n",
"0.32981729710484153\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "18ckhebWLLra"
},
"source": [
"Ud af dem, udgør Input og output embeddings 66%"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "amFXHV9OL9SU"
},
"source": [
"# Kig på tokens"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "NfeGCTv5Vvmu"
},
"source": [
"Hent tekst corpus fra https://wortschatz.uni-leipzig.de/en/download/Danish på dansk og engelsk. I modsætning til Davids originale script, så henter jeg nyheder og ikke web-public - primært for at få et 'renere' sprog."
]
},
{
"cell_type": "code",
"metadata": {
"id": "WxsNhpKfME5W",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "95a14355-bb96-426c-baeb-7e97965d9afe"
},
"source": [
"!wget http://pcai056.informatik.uni-leipzig.de/downloads/corpora/dan_news_2020_1M.tar.gz\n",
"!tar -xsvf dan_news_2020_1M.tar.gz"
],
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"--2022-01-06 10:03:19-- http://pcai056.informatik.uni-leipzig.de/downloads/corpora/dan_news_2020_1M.tar.gz\n",
"Resolving pcai056.informatik.uni-leipzig.de (pcai056.informatik.uni-leipzig.de)... 139.18.2.216\n",
"Connecting to pcai056.informatik.uni-leipzig.de (pcai056.informatik.uni-leipzig.de)|139.18.2.216|:80... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 210729442 (201M) [application/x-gzip]\n",
"Saving to: ‘dan_news_2020_1M.tar.gz’\n",
"\n",
"dan_news_2020_1M.ta 100%[===================>] 200.97M 73.6MB/s in 2.7s \n",
"\n",
"2022-01-06 10:03:21 (73.6 MB/s) - ‘dan_news_2020_1M.tar.gz’ saved [210729442/210729442]\n",
"\n",
"dan_news_2020_1M/\n",
"dan_news_2020_1M/dan_news_2020_1M-co_s.txt\n",
"dan_news_2020_1M/dan_news_2020_1M-inv_w.txt\n",
"dan_news_2020_1M/dan_news_2020_1M-words.txt\n",
"dan_news_2020_1M/dan_news_2020_1M-inv_so.txt\n",
"dan_news_2020_1M/dan_news_2020_1M-import.sql\n",
"dan_news_2020_1M/dan_news_2020_1M-sentences.txt\n",
"dan_news_2020_1M/dan_news_2020_1M-co_n.txt\n",
"dan_news_2020_1M/dan_news_2020_1M-sources.txt\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "XNHwPMCHiRhr",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "7e0c3fcc-2d79-4143-94b8-e35de23f0c81"
},
"source": [
"!wget http://pcai056.informatik.uni-leipzig.de/downloads/corpora/eng_news_2020_1M.tar.gz\n",
"!tar -xsvf eng_news_2020_1M.tar.gz"
],
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"--2022-01-06 10:03:27-- http://pcai056.informatik.uni-leipzig.de/downloads/corpora/eng_news_2020_1M.tar.gz\n",
"Resolving pcai056.informatik.uni-leipzig.de (pcai056.informatik.uni-leipzig.de)... 139.18.2.216\n",
"Connecting to pcai056.informatik.uni-leipzig.de (pcai056.informatik.uni-leipzig.de)|139.18.2.216|:80... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 276283393 (263M) [application/x-gzip]\n",
"Saving to: ‘eng_news_2020_1M.tar.gz’\n",
"\n",
"eng_news_2020_1M.ta 100%[===================>] 263.48M 100MB/s in 2.6s \n",
"\n",
"2022-01-06 10:03:30 (100 MB/s) - ‘eng_news_2020_1M.tar.gz’ saved [276283393/276283393]\n",
"\n",
"eng_news_2020_1M/\n",
"eng_news_2020_1M/eng_news_2020_1M-import.sql\n",
"eng_news_2020_1M/eng_news_2020_1M-words.txt\n",
"eng_news_2020_1M/eng_news_2020_1M-inv_w.txt\n",
"eng_news_2020_1M/eng_news_2020_1M-sentences.txt\n",
"eng_news_2020_1M/eng_news_2020_1M-sources.txt\n",
"eng_news_2020_1M/eng_news_2020_1M-inv_so.txt\n",
"eng_news_2020_1M/eng_news_2020_1M-co_s.txt\n",
"eng_news_2020_1M/eng_news_2020_1M-co_n.txt\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "gqjTHFJIiZTk"
},
"source": [
"Load corpus ind i pandas og tag et lille kig på data"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 441
},
"id": "IoJlXMw_M7pT",
"outputId": "670eb1ee-d578-4c4a-ab98-065486b78c39"
},
"source": [
"import pandas as pd\n",
"import csv\n",
"\n",
"# Brug Googles Colabs data_table til at vise tabellen interaktivt\n",
"from google.colab import data_table\n",
"data_table.enable_dataframe_formatter()\n",
"\n",
"fname = 'dan_news_2020_1M/dan_news_2020_1M-sentences.txt'\n",
"df_da = pd.read_csv(fname, sep='\\t', header=None, quoting=csv.QUOTE_NONE)\n",
"df_da.columns = ['idx', 'text']\n",
"df_da"
],
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Warning: total number of rows (1000000) exceeds max_rows (20000). Falling back to pandas display.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
" <div id=\"df-f5464d68-01e4-49e6-bbad-dec6db913ad6\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>idx</th>\n",
" <th>text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>’0’ bliver defineret som »Jeg gør aldrig noget...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>”10 gode grunde til at elske hvaler”</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>\"10 tak 2660\" går ud på at takke alt fra læger...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>»10- til 14-årige ved måske, at de har gjort n...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>»112 skal man kun ringe, når man er i akut far...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999995</th>\n",
" <td>999996</td>\n",
" <td>Zwicky har valgt ikke at inddrage de svar, jeg...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999996</th>\n",
" <td>999997</td>\n",
" <td>Zydeco er en tempofyldt musikform, som har tæt...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999997</th>\n",
" <td>999998</td>\n",
" <td>Zydeco er forholdsvis sjælden på disse breddeg...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999998</th>\n",
" <td>999999</td>\n",
" <td>ZZ Top Jam livestreamer torsdag den 23. april ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999999</th>\n",
" <td>1000000</td>\n",
" <td>ZZ Top Jam spiller efter borgmesterens tale.</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1000000 rows × 2 columns</p>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f5464d68-01e4-49e6-bbad-dec6db913ad6')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-f5464d68-01e4-49e6-bbad-dec6db913ad6 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-f5464d68-01e4-49e6-bbad-dec6db913ad6');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
],
"text/plain": [
" idx text\n",
"0 1 ’0’ bliver defineret som »Jeg gør aldrig noget...\n",
"1 2 ”10 gode grunde til at elske hvaler”\n",
"2 3 \"10 tak 2660\" går ud på at takke alt fra læger...\n",
"3 4 »10- til 14-årige ved måske, at de har gjort n...\n",
"4 5 »112 skal man kun ringe, når man er i akut far...\n",
"... ... ...\n",
"999995 999996 Zwicky har valgt ikke at inddrage de svar, jeg...\n",
"999996 999997 Zydeco er en tempofyldt musikform, som har tæt...\n",
"999997 999998 Zydeco er forholdsvis sjælden på disse breddeg...\n",
"999998 999999 ZZ Top Jam livestreamer torsdag den 23. april ...\n",
"999999 1000000 ZZ Top Jam spiller efter borgmesterens tale.\n",
"\n",
"[1000000 rows x 2 columns]"
]
},
"metadata": {},
"execution_count": 9
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 441
},
"id": "V-Uc7nbziyXp",
"outputId": "0505102d-d838-44d5-fe9d-c80b37248b0e"
},
"source": [
"fname = 'eng_news_2020_1M/eng_news_2020_1M-sentences.txt'\n",
"df_en = pd.read_csv(fname, sep='\\t', header=None, quoting=csv.QUOTE_NONE)\n",
"df_en.columns = ['idx', 'text']\n",
"df_en"
],
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Warning: total number of rows (1000000) exceeds max_rows (20000). Falling back to pandas display.\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
" <div id=\"df-29b851bc-0d32-4fe2-8b8b-29eec0b8811c\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>idx</th>\n",
" <th>text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>!, 05/27/1994) — NHL Network, 2 p.m. &amp; 10 p.m.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>$0 spent on advertising sans the rare times we...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>€100,000 in cash was seized during the joint o...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>$100,000 to Hospice Santa Cruz County and $75,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>£1,000 bonus 'may not be enough to protect jobs'</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999995</th>\n",
" <td>999996</td>\n",
" <td>အောက်တွင် မြန်မာလို ဖတ်ရှုနိုင်ပါသည် Myanmar C...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999996</th>\n",
" <td>999997</td>\n",
" <td>一The former downtown Magpie Cafe space was tak...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999997</th>\n",
" <td>999998</td>\n",
" <td>免疫實踐諮詢委員會(Advisory Committee on Immunization P...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999998</th>\n",
" <td>999999</td>\n",
" <td>日本リサーチセンター Nippon Research Center, Ltd.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999999</th>\n",
" <td>1000000</td>\n",
" <td>食戟のソーマ4th Plate was awesome!</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1000000 rows × 2 columns</p>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-29b851bc-0d32-4fe2-8b8b-29eec0b8811c')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-29b851bc-0d32-4fe2-8b8b-29eec0b8811c button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-29b851bc-0d32-4fe2-8b8b-29eec0b8811c');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
],
"text/plain": [
" idx text\n",
"0 1 !, 05/27/1994) — NHL Network, 2 p.m. & 10 p.m.\n",
"1 2 $0 spent on advertising sans the rare times we...\n",
"2 3 €100,000 in cash was seized during the joint o...\n",
"3 4 $100,000 to Hospice Santa Cruz County and $75,...\n",
"4 5 £1,000 bonus 'may not be enough to protect jobs'\n",
"... ... ...\n",
"999995 999996 အောက်တွင် မြန်မာလို ဖတ်ရှုနိုင်ပါသည် Myanmar C...\n",
"999996 999997 一The former downtown Magpie Cafe space was tak...\n",
"999997 999998 免疫實踐諮詢委員會(Advisory Committee on Immunization P...\n",
"999998 999999 日本リサーチセンター Nippon Research Center, Ltd.\n",
"999999 1000000 食戟のソーマ4th Plate was awesome!\n",
"\n",
"[1000000 rows x 2 columns]"
]
},
"metadata": {},
"execution_count": 10
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "zhkWqfdNjNww"
},
"source": [
"Encode hver sætning og tæl antallet af gange en token bliver brugt for hvert corpus (dansk og engelsk)"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 81,
"referenced_widgets": [
"8be7375611d04b4dab1925150addffc4",
"6a20ac71356840cd99028f437b5cb387",
"5da72ccbfa8a4a8a8f1fa9b6abcb7af5",
"40c503e339dc4960981e5d1591bb2619",
"53279568c10f4af6808219621022dbe8",
"b65b1c4b41534a908176baef5133f37a",
"8ffa38100a6c44b6ab2b372586cf2029",
"875a85d050754cc98e382fd7e07c48ef",
"e76a0ef044ab49eaae844a14a482f909",
"8e93af770b1b4895aa6ff7eb97a50875",
"86128334b1fe4fd58507e2c9a8eeb525",
"a084a3da9ccd458b9939dc8c90ecee01",
"413d3dfc417d44f088972d388e28d039",
"381356f5f1f54eaea7383c82aac12e87",
"af518f9b4405463781dd3be1c4a33c40",
"0ba2bba408fe4efa86380a1fd215429e",
"b65ce4fef67f41b6bed67d15b92b38bc",
"b8681aba32c44c70aa07c967a89311d8",
"f779fe22ed5946e896427923fe8b2f7a",
"948a35328c3247b4be0b81504c0a5a2b",
"5d76be2426c14c0da8fcf7ce072281db",
"7da188e06e5846cbb98a29c9c6ebf240"
]
},
"id": "lmzSON9iM_yb",
"outputId": "f5c675f6-ad6e-4953-834b-d4593c5a6d9d"
},
"source": [
"from collections import Counter\n",
"from tqdm.auto import tqdm, trange\n",
"\n",
"cnt_da = Counter()\n",
"for text in tqdm(df_da.text):\n",
" cnt_da.update(tokenizer.encode(text))\n",
"\n",
"cnt_en = Counter()\n",
"for text in tqdm(df_en.text):\n",
" cnt_en.update(tokenizer.encode(text))"
],
"execution_count": 11,
"outputs": [
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8be7375611d04b4dab1925150addffc4",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/1000000 [00:00<?, ?it/s]"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a084a3da9ccd458b9939dc8c90ecee01",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/1000000 [00:00<?, ?it/s]"
]
},
"metadata": {}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "sTzND5F1OkEY"
},
"source": [
"Find ud af, hvor mange gange tokens bruges i hvert sprog:"
]
},
{
"cell_type": "code",
"metadata": {
"id": "M07fj3z0NWiy",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "3e871ffa-be2f-41c9-9baa-53acebb25a0f"
},
"source": [
"print('DA: ', len(cnt_da), len(cnt_da)/tokenizer.vocab_size)\n",
"print('EN: ', len(cnt_en), len(cnt_en)/tokenizer.vocab_size)\n",
"common = len(set(cnt_da.keys()).intersection(set(cnt_en.keys())))\n",
"print('Fælles: ', common, common / len(cnt_da))"
],
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"DA: 51545 0.20609756097560974\n",
"EN: 71744 0.2868612554978009\n",
"Fælles: 41396 0.8031040838102629\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "2ULUmyllmNA0"
},
"source": [
"På dansk bruges 51k (20%) ud af de 250k tokens, mens det på engelsk er 71k (28%) - hvilket giver meget god mening, da engelsk indeholder flere ord end dansk og da den originale mt5 model er trænet på væsenligt mere engelsk end dansk data.\n",
"\n",
"41k tokens bruges både på dansk og engelsk - hvilket betyder at 80% af alle danske tokens også bruges på engelsk."
]
},
{
"cell_type": "markdown",
"source": [
"Nedenfor tjekker vi hvor hvor stor en andel top 10k, 20k og 30k mest brugte token, udgør af den samlede brug"
],
"metadata": {
"id": "fO9tw6niBtwq"
}
},
{
"cell_type": "code",
"metadata": {
"id": "kNudkAe5NbKT",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "291b2811-a8b6-439c-9d28-17a804eacf8e"
},
"source": [
"print('Dansk:')\n",
"for top in 10_000, 20_000, 30_000:\n",
" print('Top:', top, sum(v for k, v in cnt_da.most_common(top)) / sum(cnt_da.values()))\n",
"print('\\nEngelsk:')\n",
"for top in 10_000, 20_000, 30_000:\n",
" print('Top:', top, sum(v for k, v in cnt_en.most_common(top)) / sum(cnt_en.values()))"
],
"execution_count": 13,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Dansk:\n",
"Top: 10000 0.982576739076921\n",
"Top: 20000 0.9956902186266333\n",
"Top: 30000 0.9986029349118826\n",
"\n",
"Engelsk:\n",
"Top: 10000 0.9515249161343127\n",
"Top: 20000 0.9823923151934567\n",
"Top: 30000 0.9925520177286887\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"Vi kan se, at 98% af alle tokens, der bruges på dansk, kan findes i top 10.000. På engelsk er det 95%. Hvis vi ser på top 20k så er næsten alle tokens på dansk brugt (99,5%)."
],
"metadata": {
"id": "xS0_kPy8B9wZ"
}
},
{
"cell_type": "markdown",
"source": [
"I forsøget på at medtage med de Danske BERT tokens, fjerner jeg de wordpiece specifikke '##' og encoder herefter hver BERT tokens til mt5 sentencepiece tokens.\n",
"Er ikke 100% på at det her er den smarteste måde at gøre det på, så råb op hvis du har en bedre ide!"
],
"metadata": {
"id": "agMqjClv1Mth"
}
},
{
"cell_type": "code",
"source": [
"tokenizerDA_vocab = tokenizerDA.vocab\n",
"\n",
"# Fjern wordpiece ##\n",
"import re\n",
"new_tokenizerDA_vocab = {re.sub('^##', '', k): v for k, v in tokenizerDA_vocab.items()}\n",
"\n",
"# Encode de danske BERT tokens\n",
"cnt_BERT_da = Counter()\n",
"for text in tqdm(new_tokenizerDA_vocab.keys()):\n",
" cnt_BERT_da.update(tokenizer.encode(text))\n",
"\n",
"cnt_da_top20k = cnt_da.most_common(20_000)\n",
"cnt_da_top20k = [item[0] for item in cnt_da_top20k]\n",
"\n",
"BERTda_tokenizer_token_ids = set(cnt_BERT_da)\n",
"tokenizer_da_token_ids = set(cnt_da_top20k)\n",
"\n",
"shared_tokens = tokenizer_da_token_ids.intersection(BERTda_tokenizer_token_ids)\n",
"\n",
"print('Antal tokens fra dansk BERT:', len(cnt_BERT_da))\n",
"print('Antal tokens fra dansk BERT brugt i encoding af danske nyheder (top 20k):', len(shared_tokens))\n"
],
"metadata": {
"id": "A5jWg8wcdM8o",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 83,
"referenced_widgets": [
"9ccd8c47002b43249f0e07459a5886f9",
"d00bf7d8a3d0488eb344244249df0fe2",
"fc5e059d553b4a72a53223ef235f1973",
"6dd473af4dad45ea86641442e27174da",
"ea4a96de78da4b939a2864125d5d90fd",
"5258b3f8b92f4f9980f37c36d5d81950",
"5667c1b0fd464a48a936177c661ae657",
"4012da0c84f74026bae1e204e1480260",
"34c2f6508dcc4b9789807425f88d9627",
"99ec014a0d894b1988f86cbbf8e5887d",
"2f313463742f4784a6e2783bb7acec55"
]
},
"outputId": "a418e18d-2dc6-4759-d583-9a59e3a3f2da"
},
"execution_count": 14,
"outputs": [
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9ccd8c47002b43249f0e07459a5886f9",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/26677 [00:00<?, ?it/s]"
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Antal tokens fra dansk BERT: 11955\n",
"Antal tokens fra dansk BERT brugt i encoding af danske nyheder (top 20k): 8969\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"Med denne lille sammenligning kan vi se, at de 31748 token id's i den originale danske BERT tokenizer, bliver til 11955 når vi bruger mt5 tokens. Af dem bruges 8969 tokens id's også i top 20k encodings af danske nyheder."
],
"metadata": {
"id": "Zz2Ns2Rm-0cC"
}
},
{
"cell_type": "markdown",
"metadata": {
"id": "0N_D37J3lbqr"
},
"source": [
"Inden vi lukker og slukker for denne lille øvelse, kopieres det gamle vocab, da det skal bruges senere..."
]
},
{
"cell_type": "code",
"metadata": {
"id": "9RzGibfZQbgP"
},
"source": [
"old_voc = tokenizer.get_vocab()\n",
"old_inv_voc = {v: k for k, v in old_voc.items()}"
],
"execution_count": 15,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Byg de 'nye' tokens"
],
"metadata": {
"id": "2DHw3wUFC4iU"
}
},
{
"cell_type": "markdown",
"metadata": {
"id": "AwwPWiO3Po1x"
},
"source": [
"Jeg sammensætter de 'nye' tokens (dvs. fjerner alle andre) på følgende måde:\n",
"\n",
"* Top 1K af de originale tokens (fordi han gør)\n",
"* Alle tokens brugt til at encode BERT tokens.\n",
"* Top 20K af de danske tokens (eller så det summer op til 30K)\n",
"* 100 (m)T5 tokens\n"
]
},
{
"cell_type": "code",
"metadata": {
"id": "J-aSMIB1Pxvh",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "defeab33-2421-4811-f31f-64cce26f06f2"
},
"source": [
"new_tokens = set(range(1000))\n",
"print(len(new_tokens))\n",
"\n",
"for i, (k, v) in enumerate(cnt_BERT_da.items()):\n",
" if k not in new_tokens:\n",
" new_tokens.add(k)\n",
"print(len(new_tokens))\n",
"\n",
"for i, (k, v) in enumerate(cnt_da.most_common(20_000)):\n",
" if len(new_tokens) == 29_900:\n",
" print(i, 'Danish tokens are included')\n",
" break\n",
" if k not in new_tokens:\n",
" new_tokens.add(k)\n",
"print(len(new_tokens))\n",
"\n",
"for t in range(tokenizer.vocab_size - 100, tokenizer.vocab_size):\n",
" new_tokens.add(t)\n",
"\n",
"print(len(new_tokens))\n",
"kept_ids = sorted(new_tokens)"
],
"execution_count": 16,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"1000\n",
"12574\n",
"23490\n",
"23590\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"I alt tilføjes der 23.590 tokens"
],
"metadata": {
"id": "tg62Ir3MIA9D"
}
},
{
"cell_type": "code",
"metadata": {
"id": "q21bC7tpTyuW",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "2a64e930-64a0-4aa0-ca8a-978c4c32d40b"
},
"source": [
"len(kept_ids) / tokenizer.vocab_size"
],
"execution_count": 17,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.09432227109156338"
]
},
"metadata": {},
"execution_count": 17
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "BLAFLhrDoD4U"
},
"source": [
"Det nye vocab indeholder kun 9% af det originale - hvilket giver mening, eftersom dansk er et lille sprog i en multilingual model."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "IaaCyAPlomLt"
},
"source": [
"### Update model embeddings"
]
},
{
"cell_type": "code",
"metadata": {
"id": "k-BNn3R6R0lY"
},
"source": [
"import torch\n",
"\n",
"new_size = len(kept_ids)\n",
"new_emb = torch.nn.Embedding(new_size, model.shared.embedding_dim)\n",
"new_head = torch.nn.Linear(in_features=model.lm_head.in_features, out_features=new_size, bias=False)\n",
"\n",
"for new_id, old_id in enumerate(kept_ids):\n",
" new_emb.weight.data[new_id] = model.shared.weight.data[old_id]\n",
" new_head.weight.data[new_id] = model.lm_head.weight.data[old_id]\n",
"\n",
"model.shared.weight = new_emb.weight\n",
"model.lm_head.weight = new_head.weight"
],
"execution_count": 18,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "g_aPBQ20kvCB",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "753f0987-ebc8-4824-a01c-73c1e51a07ac"
},
"source": [
"print('Gammel model:')\n",
"print(original_size)\n",
"\n",
"print('\\nNy model:')\n",
"print(msize(model))\n",
"print(msize(model.shared))\n",
"\n",
"print('\\nEncoder:')\n",
"print(msize(model.encoder))\n",
"print(msize(model.encoder.block))\n",
"\n",
"print('\\nDecoder:')\n",
"print(msize(model.decoder))\n",
"print(msize(model.decoder.block))\n",
"print(msize(model.lm_head))\n",
"\n",
"print('\\nSammenligning med gammel model:')\n",
"print(msize(model), msize(model) / original_size)"
],
"execution_count": 19,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Gammel model:\n",
"582401280\n",
"\n",
"Ny model:\n",
"234463488\n",
"18117120\n",
"\n",
"Encoder:\n",
"103071360\n",
"84953472\n",
"\n",
"Decoder:\n",
"131392128\n",
"113274240\n",
"18117120\n",
"\n",
"Sammenligning med gammel model:\n",
"234463488 0.40258065366889306\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "QcIDtmymo56s"
},
"source": [
"Den nye model har 'kun' 234M parameter - 40% af den originale størrelse (der var på 582k). "
]
},
{
"cell_type": "code",
"source": [
"print(msize(model.shared) / msize(model))\n",
"print(msize(model.lm_head) / msize(model))"
],
"metadata": {
"id": "JJySfhScMNfh",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "19defc3e-56fe-480e-c98a-acda4249d650"
},
"execution_count": 20,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0.07727053860087589\n",
"0.07727053860087589\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"Input og output embeddings udgør nu kun 14% af modellen (mod 66% før)"
],
"metadata": {
"id": "BZIdz0IiMfkV"
}
},
{
"cell_type": "markdown",
"metadata": {
"id": "vdKmFJY_k7xZ"
},
"source": [
"### Update the tokenizer"
]
},
{
"cell_type": "markdown",
"source": [
"Her sker der præcis det samme som i den org. notebook"
],
"metadata": {
"id": "Vi9Q_m3BNHTM"
}
},
{
"cell_type": "markdown",
"metadata": {
"id": "-X25sG0jmc83"
},
"source": [
"T5 uses Sentencepiece tokenizer, which is implemented in C and is opaque to Python. \n",
"\n",
"Fortunately, we can download its model and deploy it into Python using its Protobuf representation. \n",
"\n",
"https://github.com/google/sentencepiece/issues/121"
]
},
{
"cell_type": "code",
"metadata": {
"id": "OpII_eX3mY80",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "1d927c52-cf5f-45cf-891b-00789ce11eec"
},
"source": [
"!wget https://raw.githubusercontent.com/google/sentencepiece/master/src/sentencepiece_model.proto"
],
"execution_count": 21,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"--2022-01-06 10:12:52-- https://raw.githubusercontent.com/google/sentencepiece/master/src/sentencepiece_model.proto\n",
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...\n",
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 12872 (13K) [text/plain]\n",
"Saving to: ‘sentencepiece_model.proto’\n",
"\n",
"\r sentencep 0%[ ] 0 --.-KB/s \rsentencepiece_model 100%[===================>] 12.57K --.-KB/s in 0s \n",
"\n",
"2022-01-06 10:12:52 (75.2 MB/s) - ‘sentencepiece_model.proto’ saved [12872/12872]\n",
"\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "SGb1DiYmpnkr"
},
"source": [
"We compile the protobuf description of the sentencepiece model in order to be able to modify it. "
]
},
{
"cell_type": "code",
"metadata": {
"id": "I6B0MA5DmaZM"
},
"source": [
"!protoc --python_out=. sentencepiece_model.proto"
],
"execution_count": 22,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "nJwHRRzbngJY"
},
"source": [
"Now we can serialize the model used by the current tokenizer and open it as a protobuf class. "
]
},
{
"cell_type": "code",
"metadata": {
"id": "MdQM0L3lnybA",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 101,
"referenced_widgets": [
"9afa3fd99f634eca8ba8980fb950b9af",
"5742a9b2a6ea4519bef24cef9afcbc1a",
"36c1ac8da0f7462db9520d6d23eddae1",
"04cd8227f7444ca6b6b8239d52bbe59e",
"caaeb10b148e4777aba09bcc8befbc3c",
"19acc50e25684b9481e6f7157239b35b",
"73a2f37bdf534d2bb2f241f8e688fd7e",
"56e9f9034fea48ebb267e458b2968d8b",
"212f300fd08c4b9992f382474f72d31a",
"ec774f016437475cba927a28515ce44f",
"5365e04683f34410aadff091c2da6c96"
]
},
"outputId": "8aeef595-2616-4c7f-a3d5-5d6efb6c95f7"
},
"source": [
"import sentencepiece_model_pb2 as spmp\n",
"smp = tokenizer.sp_model.serialized_model_proto()\n",
"m = spmp.ModelProto()\n",
"m.ParseFromString(smp)\n",
"\n",
"print('the loaded model has pieces:', len(m.pieces))\n",
"new_pieces = [m.pieces[idx] for idx in kept_ids]\n",
"print('the new pieces:', len(new_pieces))\n",
"\n",
"# replace the content of the first 30K pieces\n",
"for i, p in enumerate(new_pieces):\n",
" m.pieces[i].piece = p.piece\n",
" m.pieces[i].score = p.score\n",
" m.pieces[i].type = p.type\n",
"\n",
"# drop the remaining pieces\n",
"n = len(new_pieces)\n",
"for i in trange(len(m.pieces) - n):\n",
" m.pieces.pop(len(m.pieces) - 1)\n",
"\n",
"print(len(m.pieces))\n",
"with open('new_sp.model', 'wb') as f:\n",
" f.write(m.SerializeToString())"
],
"execution_count": 23,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"the loaded model has pieces: 250100\n",
"the new pieces: 23590\n"
]
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9afa3fd99f634eca8ba8980fb950b9af",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/226510 [00:00<?, ?it/s]"
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"23590\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "qWeP6N1sry93"
},
"source": [
"new_tokenizer = MT5Tokenizer('new_sp.model', extra_ids=0)"
],
"execution_count": 24,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "czfXG1IqsDT4"
},
"source": [
"### Gem model og tokenizer"
]
},
{
"cell_type": "markdown",
"source": [
"Rekonfigurerer model config:"
],
"metadata": {
"id": "TFXXRij7ZMhy"
}
},
{
"cell_type": "code",
"metadata": {
"id": "oanCNPiIsCdU",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "1967fe0d-e40d-4a20-c3ba-5bbfb0a62c2e"
},
"source": [
"model.config.__dict__['vocab_size'] = new_size\n",
"model.config.__dict__['_name_or_path'] = output_name\n",
"model.config"
],
"execution_count": 25,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"MT5Config {\n",
" \"_name_or_path\": \"emillykkejensen/daT5-base\",\n",
" \"architectures\": [\n",
" \"MT5ForConditionalGeneration\"\n",
" ],\n",
" \"d_ff\": 2048,\n",
" \"d_kv\": 64,\n",
" \"d_model\": 768,\n",
" \"decoder_start_token_id\": 0,\n",
" \"dropout_rate\": 0.1,\n",
" \"eos_token_id\": 1,\n",
" \"feed_forward_proj\": \"gated-gelu\",\n",
" \"initializer_factor\": 1.0,\n",
" \"is_encoder_decoder\": true,\n",
" \"layer_norm_epsilon\": 1e-06,\n",
" \"model_type\": \"mt5\",\n",
" \"num_decoder_layers\": 12,\n",
" \"num_heads\": 12,\n",
" \"num_layers\": 12,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": 0,\n",
" \"relative_attention_num_buckets\": 32,\n",
" \"tie_word_embeddings\": false,\n",
" \"tokenizer_class\": \"T5Tokenizer\",\n",
" \"transformers_version\": \"4.15.0\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 23590\n",
"}"
]
},
"metadata": {},
"execution_count": 25
}
]
},
{
"cell_type": "markdown",
"source": [
"Gem den nye model og den nye tokenizer"
],
"metadata": {
"id": "ynH3IMvDZkMP"
}
},
{
"cell_type": "code",
"metadata": {
"id": "UaebisNqr4Mk"
},
"source": [
"new_tokenizer.save_pretrained('new-model')\n",
"model.save_pretrained('new-model')"
],
"execution_count": 26,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "nIoB98_9r7VU",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "d6f1f4ff-a409-41d4-cb49-0297a36ff936"
},
"source": [
"!ls new-model -alsh"
],
"execution_count": 27,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"total 896M\n",
"4.0K drwxr-xr-x 2 root root 4.0K Jan 6 10:13 .\n",
"4.0K drwxr-xr-x 1 root root 4.0K Jan 6 10:13 ..\n",
"4.0K -rw-r--r-- 1 root root 711 Jan 6 10:13 config.json\n",
"895M -rw-r--r-- 1 root root 895M Jan 6 10:13 pytorch_model.bin\n",
"4.0K -rw-r--r-- 1 root root 65 Jan 6 10:13 special_tokens_map.json\n",
"636K -rw-r--r-- 1 root root 636K Jan 6 10:13 spiece.model\n",
"4.0K -rw-r--r-- 1 root root 173 Jan 6 10:13 tokenizer_config.json\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "5gFLD5dUs7gZ"
},
"source": [
"Den nye model, fylder en hel del mindre end den originale mt5 - kun 595 Mb mod 2,17 Gb!"
]
},
{
"cell_type": "code",
"source": [
"!zip -r /content/new-model.zip /content/new-model\n"
],
"metadata": {
"id": "oe0Otwl5KgMu",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "7d7ac73a-5afb-4e6f-e73d-8f968275f998"
},
"execution_count": 28,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" adding: content/new-model/ (stored 0%)\n",
" adding: content/new-model/special_tokens_map.json (deflated 34%)\n",
" adding: content/new-model/tokenizer_config.json (deflated 32%)\n",
" adding: content/new-model/config.json (deflated 45%)\n",
" adding: content/new-model/pytorch_model.bin (deflated 53%)\n",
" adding: content/new-model/spiece.model (deflated 51%)\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"Zip og gem, hvis det er det du er til :)"
],
"metadata": {
"id": "h-Sx_mHyKt5c"
}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment