Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save JonathanLoscalzo/94caa68519f6ef76b871e14f765abf84 to your computer and use it in GitHub Desktop.
Save JonathanLoscalzo/94caa68519f6ef76b871e14f765abf84 to your computer and use it in GitHub Desktop.
LLM_zoomcamp_RAG - Homework.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"gpuType": "V28",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "TPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"2cd8a64aa01f4af8be4ca4796bc36218": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_3e04b8a847f849aa8882a82328456fe9",
"IPY_MODEL_c6b0ee0554814433a2e9d017be0b4764",
"IPY_MODEL_342c742d0428448ab0915fa9acc0a8b3"
],
"layout": "IPY_MODEL_0fd76df09026425d8295bb3a4582306f"
}
},
"3e04b8a847f849aa8882a82328456fe9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_fe663c0c6acd412d9c53ee042b529b15",
"placeholder": "​",
"style": "IPY_MODEL_e1cf6cf1869643ac849b8087afabf28a",
"value": "modules.json: 100%"
}
},
"c6b0ee0554814433a2e9d017be0b4764": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_c9cf1e9be6fe4f3d992a7171e64aab04",
"max": 349,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_adef47740fdd405ab23f86097e63c943",
"value": 349
}
},
"342c742d0428448ab0915fa9acc0a8b3": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_321d62c42afb4b7d82ca0220e10fcb51",
"placeholder": "​",
"style": "IPY_MODEL_6996b076a0d248988bdbbedd63bfbb1a",
"value": " 349/349 [00:00<00:00, 9.51kB/s]"
}
},
"0fd76df09026425d8295bb3a4582306f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"fe663c0c6acd412d9c53ee042b529b15": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e1cf6cf1869643ac849b8087afabf28a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"c9cf1e9be6fe4f3d992a7171e64aab04": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"adef47740fdd405ab23f86097e63c943": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"321d62c42afb4b7d82ca0220e10fcb51": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6996b076a0d248988bdbbedd63bfbb1a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"68bb25b4b34f45b1b96fd0aaa2bbe9e1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_5a3785fd5fd6491dbb9b3f648031250d",
"IPY_MODEL_2cab8011ddae4c5395103a49ffa26525",
"IPY_MODEL_1906d6e2a54e4e3fbcfa9f5e5fcef881"
],
"layout": "IPY_MODEL_d091822808bc4a65baf6a4e9209e482b"
}
},
"5a3785fd5fd6491dbb9b3f648031250d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_98dd823fd06544ce883462c3a35ad211",
"placeholder": "​",
"style": "IPY_MODEL_a71549cb8128458cbe9670bde0d40340",
"value": "config_sentence_transformers.json: 100%"
}
},
"2cab8011ddae4c5395103a49ffa26525": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b28da7e2dd4b402ebe28222dad7a4597",
"max": 116,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_3b0b22781d8b4fc2984b31eb787ae37e",
"value": 116
}
},
"1906d6e2a54e4e3fbcfa9f5e5fcef881": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_de59ad549fc44c03b2ff70f70516d2d2",
"placeholder": "​",
"style": "IPY_MODEL_8210236f97e846b3b6d40da412b833d1",
"value": " 116/116 [00:00<00:00, 5.49kB/s]"
}
},
"d091822808bc4a65baf6a4e9209e482b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"98dd823fd06544ce883462c3a35ad211": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a71549cb8128458cbe9670bde0d40340": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"b28da7e2dd4b402ebe28222dad7a4597": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"3b0b22781d8b4fc2984b31eb787ae37e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"de59ad549fc44c03b2ff70f70516d2d2": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"8210236f97e846b3b6d40da412b833d1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"a8fb7a2bcc754e6fbbaba30c13e345c4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_4a98c8721f2646628ca96a94f9b5598b",
"IPY_MODEL_988b4b19d4c54748a95608d67dcfd61f",
"IPY_MODEL_dd2d12ffa826440f83c6a81d9ec35ea4"
],
"layout": "IPY_MODEL_cc0747fa80ab4f79a2f2955bc7aea90b"
}
},
"4a98c8721f2646628ca96a94f9b5598b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_88811a26e491419d972228482880accd",
"placeholder": "​",
"style": "IPY_MODEL_ecb73cff909a4ca0a1a652998c33bb39",
"value": "README.md: 100%"
}
},
"988b4b19d4c54748a95608d67dcfd61f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_f4818e774c354911bcd1ed45a1a70eb5",
"max": 10659,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_45aca61546444d84b30d8d34672ad36d",
"value": 10659
}
},
"dd2d12ffa826440f83c6a81d9ec35ea4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_1254d5f25b744fe7bc4623a3054300ef",
"placeholder": "​",
"style": "IPY_MODEL_d5c2c1c3f8944ed7bbb1da12085d1d73",
"value": " 10.7k/10.7k [00:00<00:00, 613kB/s]"
}
},
"cc0747fa80ab4f79a2f2955bc7aea90b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"88811a26e491419d972228482880accd": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"ecb73cff909a4ca0a1a652998c33bb39": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"f4818e774c354911bcd1ed45a1a70eb5": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"45aca61546444d84b30d8d34672ad36d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"1254d5f25b744fe7bc4623a3054300ef": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d5c2c1c3f8944ed7bbb1da12085d1d73": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"464af6b4815a45ee81a075d1bc831a23": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_52fdd1acbac042fdaf34e3018fcb63c5",
"IPY_MODEL_ec97ae5541194b3e9804830b3af2109e",
"IPY_MODEL_88014bdf674c41bc88f338caf0189357"
],
"layout": "IPY_MODEL_9ba782f93ad544aa8929edcb9cccddae"
}
},
"52fdd1acbac042fdaf34e3018fcb63c5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_ff9e1e330a474c8f822596082009394f",
"placeholder": "​",
"style": "IPY_MODEL_e9eabee64f6d4a3ca36506d0aabd2a1b",
"value": "sentence_bert_config.json: 100%"
}
},
"ec97ae5541194b3e9804830b3af2109e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_85adc31594a34bdd91a403bab6728cc2",
"max": 53,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_9a09ae48970a4426a771d079b7711cfd",
"value": 53
}
},
"88014bdf674c41bc88f338caf0189357": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_67417f29210d499da41731b0cf6265f1",
"placeholder": "​",
"style": "IPY_MODEL_95e7360807b74c02a7ab5958e9dd33ae",
"value": " 53.0/53.0 [00:00<00:00, 1.63kB/s]"
}
},
"9ba782f93ad544aa8929edcb9cccddae": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"ff9e1e330a474c8f822596082009394f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e9eabee64f6d4a3ca36506d0aabd2a1b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"85adc31594a34bdd91a403bab6728cc2": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9a09ae48970a4426a771d079b7711cfd": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"67417f29210d499da41731b0cf6265f1": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"95e7360807b74c02a7ab5958e9dd33ae": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"0642f353f91f404f89d3a8366e34036d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_2152db58b19c4a7d9da12c20b9975cb1",
"IPY_MODEL_038ad9a0be974cec9e8327cec84ea956",
"IPY_MODEL_d9b2c54a132e4cff8384b5a1da119cd8"
],
"layout": "IPY_MODEL_c1c47bee3a5c4efeaab120a3b67f40f0"
}
},
"2152db58b19c4a7d9da12c20b9975cb1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b80a07ecf2d84f42b6b05a96959dbce7",
"placeholder": "​",
"style": "IPY_MODEL_36c4adfe9a2447199ff49f8ec1a2ad79",
"value": "config.json: 100%"
}
},
"038ad9a0be974cec9e8327cec84ea956": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_583814f060d64fa4bf5ffccff223beaa",
"max": 612,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_5fcfc7ee7b784f1483b3ba2372901837",
"value": 612
}
},
"d9b2c54a132e4cff8384b5a1da119cd8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_f69b57872d3542f697463dcc6138093c",
"placeholder": "​",
"style": "IPY_MODEL_646001c640654c1bb13f116a71e2fe12",
"value": " 612/612 [00:00<00:00, 32.6kB/s]"
}
},
"c1c47bee3a5c4efeaab120a3b67f40f0": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b80a07ecf2d84f42b6b05a96959dbce7": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"36c4adfe9a2447199ff49f8ec1a2ad79": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"583814f060d64fa4bf5ffccff223beaa": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"5fcfc7ee7b784f1483b3ba2372901837": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"f69b57872d3542f697463dcc6138093c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"646001c640654c1bb13f116a71e2fe12": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"d240ae77302e44fa8d8cefd41db7070a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_87c2f03a634d4884a84957e4a9732921",
"IPY_MODEL_d067d3741ebf4f44b9cc97544d037be8",
"IPY_MODEL_a381d81125a14b82ac09507b632ce0d9"
],
"layout": "IPY_MODEL_cd53914499cd474e8939bc8336f83f47"
}
},
"87c2f03a634d4884a84957e4a9732921": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_1875ecf498964c61a68c7a22511b76d5",
"placeholder": "​",
"style": "IPY_MODEL_464caf8d8cfc444a9325e8444ab66b48",
"value": "model.safetensors: 100%"
}
},
"d067d3741ebf4f44b9cc97544d037be8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_e9bd14368db34cd8a88495e11f4a3011",
"max": 90868376,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_1da3d016cfde44bfa9134141c3b85f63",
"value": 90868376
}
},
"a381d81125a14b82ac09507b632ce0d9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b42f35766f9b45b08cce9c87d61227f8",
"placeholder": "​",
"style": "IPY_MODEL_c54b0b0e92e943b48a6ffa626686613d",
"value": " 90.9M/90.9M [00:00<00:00, 117MB/s]"
}
},
"cd53914499cd474e8939bc8336f83f47": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"1875ecf498964c61a68c7a22511b76d5": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"464caf8d8cfc444a9325e8444ab66b48": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"e9bd14368db34cd8a88495e11f4a3011": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"1da3d016cfde44bfa9134141c3b85f63": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"b42f35766f9b45b08cce9c87d61227f8": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c54b0b0e92e943b48a6ffa626686613d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"11cc039c692a4d758c49491690791a71": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_fa2eb2e441594545a2f030277a80fbd6",
"IPY_MODEL_ac42d3d26fec4b028d36860881041313",
"IPY_MODEL_c6a507c3efcd4a4d8cfbda3a888ae44e"
],
"layout": "IPY_MODEL_e680bc875b3f4f42afe5427d80651a59"
}
},
"fa2eb2e441594545a2f030277a80fbd6": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_99339defd1a649d6a4a5b961223b88b1",
"placeholder": "​",
"style": "IPY_MODEL_38f5de4afba64d86943f6396872573ab",
"value": "tokenizer_config.json: 100%"
}
},
"ac42d3d26fec4b028d36860881041313": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_63fb7fc9cbe44fe9b8d73f6a07e4d1ea",
"max": 350,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_b396f5203381461cad3ea17b7df1b0e8",
"value": 350
}
},
"c6a507c3efcd4a4d8cfbda3a888ae44e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_9eb2324c0d8c4c0bbd968958ee6f4eb1",
"placeholder": "​",
"style": "IPY_MODEL_c6d8d5819e5e4f4885ba207c2d938484",
"value": " 350/350 [00:00<00:00, 17.7kB/s]"
}
},
"e680bc875b3f4f42afe5427d80651a59": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"99339defd1a649d6a4a5b961223b88b1": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"38f5de4afba64d86943f6396872573ab": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"63fb7fc9cbe44fe9b8d73f6a07e4d1ea": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b396f5203381461cad3ea17b7df1b0e8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"9eb2324c0d8c4c0bbd968958ee6f4eb1": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c6d8d5819e5e4f4885ba207c2d938484": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"e6d2cc38c0514debac750dbb4e2cdcf4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_ab6575c1686e439d8c55c6d9b4a5a05d",
"IPY_MODEL_24b2327e585b458983228e4e89c371b6",
"IPY_MODEL_a38ce152abc7491d9364dd662e3f600f"
],
"layout": "IPY_MODEL_346632361133481090e7a7fda6e3528c"
}
},
"ab6575c1686e439d8c55c6d9b4a5a05d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_7ed83eef6fa04290876741329dda1454",
"placeholder": "​",
"style": "IPY_MODEL_ef2934f3870b44d791bff8ec205fae83",
"value": "vocab.txt: 100%"
}
},
"24b2327e585b458983228e4e89c371b6": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_fc10cd7dbfe64a81aaf70320564be57e",
"max": 231508,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_f035a56a9fdc427eb46f7d14a21622da",
"value": 231508
}
},
"a38ce152abc7491d9364dd662e3f600f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_810ab543ede545ad81cb2d80279b1cde",
"placeholder": "​",
"style": "IPY_MODEL_eab7510912c34ac29e3e9c6ab8267b80",
"value": " 232k/232k [00:00<00:00, 2.76MB/s]"
}
},
"346632361133481090e7a7fda6e3528c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"7ed83eef6fa04290876741329dda1454": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"ef2934f3870b44d791bff8ec205fae83": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"fc10cd7dbfe64a81aaf70320564be57e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f035a56a9fdc427eb46f7d14a21622da": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"810ab543ede545ad81cb2d80279b1cde": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"eab7510912c34ac29e3e9c6ab8267b80": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"9d2c0151b5064794b022bcdb0da8ad98": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_862e5eca29dc4706a875e021c2fc23aa",
"IPY_MODEL_95cfc0e17f284ff3885eeda47692d17c",
"IPY_MODEL_44254edada204abb91aa7404372440c4"
],
"layout": "IPY_MODEL_bfa46c8e2c8248889b9762b26d0fd28e"
}
},
"862e5eca29dc4706a875e021c2fc23aa": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_545ab413f0c742129ab52146358a5319",
"placeholder": "​",
"style": "IPY_MODEL_61877f825a7e42c7bb49c07d59f7daaf",
"value": "tokenizer.json: 100%"
}
},
"95cfc0e17f284ff3885eeda47692d17c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_49e26c886e094ca7aa2f19863c024b1f",
"max": 466247,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_16f410cd5beb4f06a6f2956ea7c0e125",
"value": 466247
}
},
"44254edada204abb91aa7404372440c4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_24478c3a405d438c882385e5ae4c2edf",
"placeholder": "​",
"style": "IPY_MODEL_6da110acfe6743a5a4243dc42f5e94f1",
"value": " 466k/466k [00:00<00:00, 10.3MB/s]"
}
},
"bfa46c8e2c8248889b9762b26d0fd28e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"545ab413f0c742129ab52146358a5319": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"61877f825a7e42c7bb49c07d59f7daaf": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"49e26c886e094ca7aa2f19863c024b1f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"16f410cd5beb4f06a6f2956ea7c0e125": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"24478c3a405d438c882385e5ae4c2edf": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6da110acfe6743a5a4243dc42f5e94f1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"9cfe55a04c1843d7827209e331d49dc0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_af044bda42724154848cb2a9a7b0794a",
"IPY_MODEL_86878eb3fe6447da9c2a2517ceb96b0c",
"IPY_MODEL_8f555fec379c4b14821e77b66025394c"
],
"layout": "IPY_MODEL_6bf185deb7944d57b858a03c9ba6077a"
}
},
"af044bda42724154848cb2a9a7b0794a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b24ccccdd04c455bbfedc9fa217b2dd6",
"placeholder": "​",
"style": "IPY_MODEL_36b99af7f0c94413b2440744bb7c5fa2",
"value": "special_tokens_map.json: 100%"
}
},
"86878eb3fe6447da9c2a2517ceb96b0c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_3089bcfee22544759b4613b3b6037aca",
"max": 112,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_624caa8e6ad64ab28753ec87dafe8dd9",
"value": 112
}
},
"8f555fec379c4b14821e77b66025394c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_67df1bb534d640668e657cf64d577889",
"placeholder": "​",
"style": "IPY_MODEL_74d4f5bf52184dbca271b406e0761391",
"value": " 112/112 [00:00<00:00, 3.67kB/s]"
}
},
"6bf185deb7944d57b858a03c9ba6077a": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b24ccccdd04c455bbfedc9fa217b2dd6": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"36b99af7f0c94413b2440744bb7c5fa2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"3089bcfee22544759b4613b3b6037aca": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"624caa8e6ad64ab28753ec87dafe8dd9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"67df1bb534d640668e657cf64d577889": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"74d4f5bf52184dbca271b406e0761391": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"efeda61fa58a450aa8fe23f8c5782b4d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_496a55c53170450786b0d5ecb68a9453",
"IPY_MODEL_5a310c918bc94d76978eb88aaca8eecc",
"IPY_MODEL_f2d7138a21b746298a629c20e73581c3"
],
"layout": "IPY_MODEL_2fabdd86911b42cea93768be87749c49"
}
},
"496a55c53170450786b0d5ecb68a9453": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_7c6f07b95f754a6eb9b55eba102a6ef4",
"placeholder": "​",
"style": "IPY_MODEL_a17db543e2664b1ba1190f1278b0481b",
"value": "1_Pooling/config.json: 100%"
}
},
"5a310c918bc94d76978eb88aaca8eecc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_90fb752a27c1475eb23ff6efe60f9a29",
"max": 190,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_cef3f2a728d14d42b48a81f652908f25",
"value": 190
}
},
"f2d7138a21b746298a629c20e73581c3": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_9975ff3b5517426d881e0f8770d83d89",
"placeholder": "​",
"style": "IPY_MODEL_f9ba9aecf1204157a8d8d343251c25dc",
"value": " 190/190 [00:00<00:00, 7.41kB/s]"
}
},
"2fabdd86911b42cea93768be87749c49": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"7c6f07b95f754a6eb9b55eba102a6ef4": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a17db543e2664b1ba1190f1278b0481b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"90fb752a27c1475eb23ff6efe60f9a29": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"cef3f2a728d14d42b48a81f652908f25": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"9975ff3b5517426d881e0f8770d83d89": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f9ba9aecf1204157a8d8d343251c25dc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/JonathanLoscalzo/94caa68519f6ef76b871e14f765abf84/llm_zoomcamp_rag-homework.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [
"# Intro dlt -> LanceDB loading example"
],
"metadata": {
"id": "Y2-47Y87jwW_"
}
},
{
"cell_type": "markdown",
"source": [
"https://lu.ma/cnpdoc5n\n",
"\n",
"If you want to play around with this notebook and make edits in future, we highly recommend making a copy since the link is view only! Also make sure you're signed in with your Google account to be able to add secrets.\n",
"\n",
"Before going into a more complex example, we will go through a simple example of how to load the course Q&A data into LanceDB."
],
"metadata": {
"id": "hjkkr8_UH0K_"
}
},
{
"cell_type": "markdown",
"source": [
"## Install requirements"
],
"metadata": {
"id": "K3VvFlhSbRYx"
}
},
{
"cell_type": "markdown",
"source": [
"To create a json -> lancedb pipeline, we need to install:\n",
"1. dlt with lancedb extras\n",
"2. sentence-transformers: we need to use an embedding model to vectorize and store data inside LanceDB. For this we choose the open-source model \"sentence-transformers/all-MiniLM-L6-v2\"."
],
"metadata": {
"id": "OSlHmqELbQHI"
}
},
{
"cell_type": "code",
"source": [
"%%capture\n",
"!pip install dlt[lancedb]==0.5.1a0\n",
"!pip install sentence-transformers"
],
"metadata": {
"id": "vcQ6QseXKSHX"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Load the data"
],
"metadata": {
"id": "roJHcRy0bW95"
}
},
{
"cell_type": "markdown",
"source": [
"We'll first load the data just into LanceDB, without embedding it. LanceDB stores both the data and the embeddings, and can also embed data and queries on the fly.\n",
"\n",
"Some definitions:\n",
"* A dlt **source** is a grouping of **resources** (e.g. all your data from Hubspot)\n",
"* A dlt **resource** is a function that yields data (e.g. a function yielding all your Hubspot companies)\n",
"* A dlt **pipeline** is how you ingest your data\n",
"\n",
"Loading the data consists of a few steps:\n",
"1. Use the requests library to get the data\n",
"2. Define a dlt resource that yields the individual documents\n",
"3. Create a dlt pipeline and run it"
],
"metadata": {
"id": "c5-X9owIbsAL"
}
},
{
"cell_type": "code",
"source": [
"!rm -R -d ./.lancedb"
],
"metadata": {
"id": "xrt-4J1u88Wu",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "5bbdd361-d468-465f-a217-a838636707b0"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"rm: cannot remove './.lancedb': No such file or directory\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import requests\n",
"import dlt\n",
"\n",
"qa_dataset = requests.get(\"https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1\").json()\n",
"\n",
"@dlt.resource\n",
"def qa_documents():\n",
" for course in qa_dataset:\n",
" yield course[\"documents\"]\n",
"\n",
"pipeline = dlt.pipeline(pipeline_name=\"from_json\", destination=\"lancedb\", dataset_name=\"qanda\")\n",
"\n",
"load_info = pipeline.run(qa_documents, table_name=\"documents\")\n",
"\n",
"print(load_info)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "_CTfhhFGJ-ma",
"outputId": "f645b4ac-bedd-4581-a23c-8a9a29ba0b9a"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"_dlt_pipeline_state\n",
"[{'name': 'version', 'data_type': 'bigint', 'nullable': False}, {'name': 'engine_version', 'data_type': 'bigint', 'nullable': False}, {'name': 'pipeline_name', 'data_type': 'text', 'nullable': False}, {'name': 'state', 'data_type': 'text', 'nullable': False}, {'name': 'created_at', 'data_type': 'timestamp', 'nullable': False}, {'name': 'version_hash', 'data_type': 'text', 'nullable': True}, {'name': '_dlt_load_id', 'data_type': 'text', 'nullable': False}, {'name': '_dlt_id', 'data_type': 'text', 'nullable': False, 'unique': True}]\n",
"_dlt_version\n",
"[{'name': 'version', 'data_type': 'bigint', 'nullable': False}, {'name': 'engine_version', 'data_type': 'bigint', 'nullable': False}, {'name': 'inserted_at', 'data_type': 'timestamp', 'nullable': False}, {'name': 'schema_name', 'data_type': 'text', 'nullable': False}, {'name': 'version_hash', 'data_type': 'text', 'nullable': False}, {'name': 'schema', 'data_type': 'text', 'nullable': False}]\n",
"documents\n",
"[{'name': 'text', 'data_type': 'text', 'nullable': True}, {'name': 'section', 'data_type': 'text', 'nullable': True}, {'name': 'question', 'data_type': 'text', 'nullable': True}, {'name': '_dlt_load_id', 'data_type': 'text', 'nullable': False}, {'name': '_dlt_id', 'data_type': 'text', 'nullable': False, 'unique': True}]\n",
"_dlt_loads\n",
"[{'name': 'load_id', 'data_type': 'text', 'nullable': False}, {'name': 'schema_name', 'data_type': 'text', 'nullable': True}, {'name': 'status', 'data_type': 'bigint', 'nullable': False}, {'name': 'inserted_at', 'data_type': 'timestamp', 'nullable': False}, {'name': 'schema_version_hash', 'data_type': 'text', 'nullable': True}]\n",
"UPLOAD\n",
"Pipeline from_json load step completed in 0.71 seconds\n",
"1 load package(s) were loaded to destination LanceDB and into dataset qanda\n",
"The LanceDB destination used <dlt.destinations.impl.lancedb.configuration.LanceDBCredentials object at 0x7e1a1d624a30> location to store data\n",
"Load package 1721160120.0989413 is LOADED and contains no failed jobs\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import lancedb\n",
"\n",
"db = lancedb.connect(\"./.lancedb\")\n",
"print(db.table_names())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "pOuo4R7XNH9y",
"outputId": "3fa339b9-1127-42c8-fb20-87fbaf14a09d"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['qanda____dlt_loads', 'qanda____dlt_pipeline_state', 'qanda____dlt_version', 'qanda___dltSentinelTable', 'qanda___documents']\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"db_table = db.open_table(\"qanda___documents\")\n",
"db_table.to_pandas()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 597
},
"id": "PQwiUyt_Pb_H",
"outputId": "457b0d46-d1f5-4990-c927-f19194a36628"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" id__ \\\n",
"0 ac06f7c3-457e-5168-a018-15c9e8331b98 \n",
"1 ea60520b-d7ce-5030-bcb0-ca2da3544803 \n",
"2 8a306d3f-d207-5bc8-a073-4eaf2bafdd5b \n",
"3 66022dfd-6e8b-51a8-9e4e-e9f187d17ac1 \n",
"4 80c3836d-2ffc-5f9f-b45e-61926e750324 \n",
".. ... \n",
"943 e96c5d93-7dfb-5516-8c3a-66e4e24ff732 \n",
"944 137e96ec-9e57-5f0d-80b2-604d11c388d7 \n",
"945 0a8593db-dfca-5c3c-a77d-6b683f174824 \n",
"946 9edb23b0-4721-5db5-8ca7-3e609e7d1dfd \n",
"947 314fc430-a743-5f97-bfc3-2e94e767b25b \n",
"\n",
" text \\\n",
"0 The purpose of this document is to capture fre... \n",
"1 GitHub - DataTalksClub data-engineering-zoomca... \n",
"2 Yes, even if you don't register, you're still ... \n",
"3 You don't need it. You're accepted. You can al... \n",
"4 You can start by installing and setting up all... \n",
".. ... \n",
"943 Problem description\\nThis is the step in the c... \n",
"944 Problem description\\nWhen a docker-compose fil... \n",
"945 Problem description\\nIf you are having problem... \n",
"946 Problem description\\nPre-commit command was fa... \n",
"947 Problem description\\nInfrastructure created in... \n",
"\n",
" section \\\n",
"0 General course-related questions \n",
"1 General course-related questions \n",
"2 General course-related questions \n",
"3 General course-related questions \n",
"4 General course-related questions \n",
".. ... \n",
"943 Module 6: Best practices \n",
"944 Module 6: Best practices \n",
"945 Module 6: Best practices \n",
"946 Module 6: Best practices \n",
"947 Module 6: Best practices \n",
"\n",
" question _dlt_load_id \\\n",
"0 Course - When will the course start? 1721160120.0989413 \n",
"1 Course - What are the prerequisites for this c... 1721160120.0989413 \n",
"2 Course - Can I still join the course after the... 1721160120.0989413 \n",
"3 Course - I have registered for the Data Engine... 1721160120.0989413 \n",
"4 Course - What can I do before the course starts? 1721160120.0989413 \n",
".. ... ... \n",
"943 Github actions: Permission denied error when e... 1721160120.0989413 \n",
"944 Managing Multiple Docker Containers with docke... 1721160120.0989413 \n",
"945 AWS regions need to match docker-compose 1721160120.0989413 \n",
"946 Isort Pre-commit 1721160120.0989413 \n",
"947 How to destroy infrastructure created via GitH... 1721160120.0989413 \n",
"\n",
" _dlt_id \n",
"0 Ot00Huzv/9dfgg \n",
"1 NPVIroNed7hj/w \n",
"2 K1IjDKvD+H7Vxw \n",
"3 gcFNldblqtBfWQ \n",
"4 tf/Bw9Qufo/TaQ \n",
".. ... \n",
"943 Pd/oCcmzGlnKkQ \n",
"944 y48tVAzVriOcsA \n",
"945 8l3+afGA54Fa/Q \n",
"946 MaqLm+XOmcA6wQ \n",
"947 WisAiAPCsj61hg \n",
"\n",
"[948 rows x 6 columns]"
],
"text/html": [
"\n",
" <div id=\"df-fe0ffe44-7929-40ef-973e-c4c8067116a0\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id__</th>\n",
" <th>text</th>\n",
" <th>section</th>\n",
" <th>question</th>\n",
" <th>_dlt_load_id</th>\n",
" <th>_dlt_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>ac06f7c3-457e-5168-a018-15c9e8331b98</td>\n",
" <td>The purpose of this document is to capture fre...</td>\n",
" <td>General course-related questions</td>\n",
" <td>Course - When will the course start?</td>\n",
" <td>1721160120.0989413</td>\n",
" <td>Ot00Huzv/9dfgg</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>ea60520b-d7ce-5030-bcb0-ca2da3544803</td>\n",
" <td>GitHub - DataTalksClub data-engineering-zoomca...</td>\n",
" <td>General course-related questions</td>\n",
" <td>Course - What are the prerequisites for this c...</td>\n",
" <td>1721160120.0989413</td>\n",
" <td>NPVIroNed7hj/w</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8a306d3f-d207-5bc8-a073-4eaf2bafdd5b</td>\n",
" <td>Yes, even if you don't register, you're still ...</td>\n",
" <td>General course-related questions</td>\n",
" <td>Course - Can I still join the course after the...</td>\n",
" <td>1721160120.0989413</td>\n",
" <td>K1IjDKvD+H7Vxw</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>66022dfd-6e8b-51a8-9e4e-e9f187d17ac1</td>\n",
" <td>You don't need it. You're accepted. You can al...</td>\n",
" <td>General course-related questions</td>\n",
" <td>Course - I have registered for the Data Engine...</td>\n",
" <td>1721160120.0989413</td>\n",
" <td>gcFNldblqtBfWQ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>80c3836d-2ffc-5f9f-b45e-61926e750324</td>\n",
" <td>You can start by installing and setting up all...</td>\n",
" <td>General course-related questions</td>\n",
" <td>Course - What can I do before the course starts?</td>\n",
" <td>1721160120.0989413</td>\n",
" <td>tf/Bw9Qufo/TaQ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>943</th>\n",
" <td>e96c5d93-7dfb-5516-8c3a-66e4e24ff732</td>\n",
" <td>Problem description\\nThis is the step in the c...</td>\n",
" <td>Module 6: Best practices</td>\n",
" <td>Github actions: Permission denied error when e...</td>\n",
" <td>1721160120.0989413</td>\n",
" <td>Pd/oCcmzGlnKkQ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>944</th>\n",
" <td>137e96ec-9e57-5f0d-80b2-604d11c388d7</td>\n",
" <td>Problem description\\nWhen a docker-compose fil...</td>\n",
" <td>Module 6: Best practices</td>\n",
" <td>Managing Multiple Docker Containers with docke...</td>\n",
" <td>1721160120.0989413</td>\n",
" <td>y48tVAzVriOcsA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>945</th>\n",
" <td>0a8593db-dfca-5c3c-a77d-6b683f174824</td>\n",
" <td>Problem description\\nIf you are having problem...</td>\n",
" <td>Module 6: Best practices</td>\n",
" <td>AWS regions need to match docker-compose</td>\n",
" <td>1721160120.0989413</td>\n",
" <td>8l3+afGA54Fa/Q</td>\n",
" </tr>\n",
" <tr>\n",
" <th>946</th>\n",
" <td>9edb23b0-4721-5db5-8ca7-3e609e7d1dfd</td>\n",
" <td>Problem description\\nPre-commit command was fa...</td>\n",
" <td>Module 6: Best practices</td>\n",
" <td>Isort Pre-commit</td>\n",
" <td>1721160120.0989413</td>\n",
" <td>MaqLm+XOmcA6wQ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>947</th>\n",
" <td>314fc430-a743-5f97-bfc3-2e94e767b25b</td>\n",
" <td>Problem description\\nInfrastructure created in...</td>\n",
" <td>Module 6: Best practices</td>\n",
" <td>How to destroy infrastructure created via GitH...</td>\n",
" <td>1721160120.0989413</td>\n",
" <td>WisAiAPCsj61hg</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>948 rows × 6 columns</p>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-fe0ffe44-7929-40ef-973e-c4c8067116a0')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-fe0ffe44-7929-40ef-973e-c4c8067116a0 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-fe0ffe44-7929-40ef-973e-c4c8067116a0');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-d36d6e7a-5305-42fc-aadd-64a8c2ce0e79\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-d36d6e7a-5305-42fc-aadd-64a8c2ce0e79')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-d36d6e7a-5305-42fc-aadd-64a8c2ce0e79 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"summary": "{\n \"name\": \"db_table\",\n \"rows\": 948,\n \"fields\": [\n {\n \"column\": \"id__\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 948,\n \"samples\": [\n \"c97ecb35-c3a6-57b6-8977-675ab8274a1a\",\n \"9d7c33ab-42fb-5053-9c90-d80f19ece8fa\",\n \"4439c17d-3543-5928-9052-1a2e18e2076d\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 945,\n \"samples\": [\n \"Problem: The output of DictVectorizer was taking up too much memory. So much so, that I couldn\\u2019t even fit the linear regression model before running out of memory on my 16 GB machine.\\nSolution: In the example for DictVectorizer in the scikit-learn website, they set the parameter \\u201csparse\\u201d as False. Although this helps with viewing the results, this results in a lot of memory usage. The solution is to either use \\u201csparse=True\\u201d instead, or leave it at the default which is also True.\\nAhmed Fahim (afahim03@yahoo.com)\",\n \"Link to Slack Thread : has anyone figured out how to read from GCP data lake instead of downloading all the taxi data again?\\nThere\\u2019s a few extra steps to go into reading from GCS with PySpark\\n1.) IMPORTANT: Download the Cloud Storage connector for Hadoop here: https://cloud.google.com/dataproc/docs/concepts/connectors/cloud-storage#clusters\\nAs the name implies, this .jar file is what essentially connects PySpark with your GCS\\n2.) Move the .jar file to your Spark file directory. I installed Spark using homebrew on my MacOS machine and I had to create a /jars directory under \\\"/opt/homebrew/Cellar/apache-spark/3.2.1/ (where my spark dir is located)\\n3.) In your Python script, there are a few extra classes you\\u2019ll have to import:\\nimport pyspark\\nfrom pyspark.sql import SparkSession\\nfrom pyspark.conf import SparkConf\\nfrom pyspark.context import SparkContext\\n4.) You must set up your configurations before building your SparkSession. Here\\u2019s my code snippet:\\nconf = SparkConf() \\\\\\n.setMaster('local[*]') \\\\\\n.setAppName('test') \\\\\\n.set(\\\"spark.jars\\\", \\\"/opt/homebrew/Cellar/apache-spark/3.2.1/jars/gcs-connector-hadoop3-latest.jar\\\") \\\\\\n.set(\\\"spark.hadoop.google.cloud.auth.service.account.enable\\\", \\\"true\\\") \\\\\\n.set(\\\"spark.hadoop.google.cloud.auth.service.account.json.keyfile\\\", \\\"path/to/google_credentials.json\\\")\\nsc = SparkContext(conf=conf)\\nsc._jsc.hadoopConfiguration().set(\\\"fs.AbstractFileSystem.gs.impl\\\", \\\"com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS\\\")\\nsc._jsc.hadoopConfiguration().set(\\\"fs.gs.impl\\\", \\\"com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem\\\")\\nsc._jsc.hadoopConfiguration().set(\\\"fs.gs.auth.service.account.json.keyfile\\\", \\\"path/to/google_credentials.json\\\")\\nsc._jsc.hadoopConfiguration().set(\\\"fs.gs.auth.service.account.enable\\\", \\\"true\\\")\\n5.) Once you run that, build your SparkSession with the new parameters we\\u2019d just instantiated in the previous step:\\nspark = SparkSession.builder \\\\\\n.config(conf=sc.getConf()) \\\\\\n.getOrCreate()\\n6.) Finally, you\\u2019re able to read your files straight from GCS!\\ndf_green = spark.read.parquet(\\\"gs://{BUCKET}/green/202*/\\\")\",\n \"1. Go to your dbt cloud service account\\n1. Adding the [Storage Object Admin,Storage Admin] role in addition tco BigQuery Admin.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"section\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 32,\n \"samples\": [\n \"Module 4: Deployment\",\n \"3. Machine Learning for Classification\",\n \"Miscellaneous\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 943,\n \"samples\": [\n \"WSL - Permissions too open at Windows\",\n \"Why do we need the Staging dataset?\",\n \"What if my answer is not exactly the same as the choices presented?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"_dlt_load_id\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"1721160120.0989413\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"_dlt_id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 948,\n \"samples\": [\n \"Oq4redaQCER2wQ\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 5
}
]
},
{
"cell_type": "markdown",
"source": [
"## Load and embed the data"
],
"metadata": {
"id": "s5vK8EMfbfs2"
}
},
{
"cell_type": "markdown",
"source": [
"Now we load the same data again (into a new table), but embed it directly with the `lancedb_adapter`. This consists of the following steps:\n",
"\n",
"1. Define the embedding model to use via ENV variables\n",
"2. Define a new pipeline to load the same data and embed the \"text\" and \"question\" columns with the `lancedb_adapter`\n",
"\n",
"You can use any embedding model, from open source to OpenAI. We've chosen the [`all-MiniLM-L6-v2`](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) sentence transformer for speed and simplicty.\n",
"\n",
"Note: this pipeline runs slightly longer because it has to download the model and embed the data."
],
"metadata": {
"id": "aOXAMNAzigD7"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"from dlt.destinations.adapters import lancedb_adapter\n",
"\n",
"os.environ[\"DESTINATION__LANCEDB__EMBEDDING_MODEL_PROVIDER\"] = \"sentence-transformers\"\n",
"os.environ[\"DESTINATION__LANCEDB__EMBEDDING_MODEL\"] = \"all-MiniLM-L6-v2\"\n",
"\n",
"pipeline = dlt.pipeline(pipeline_name=\"from_json_embedded\", destination=\"lancedb\", dataset_name=\"qanda_embedded\")\n",
"\n",
"load_info = pipeline.run(lancedb_adapter(qa_documents, embed=[\"text\", \"question\"]), table_name=\"documents\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 684,
"referenced_widgets": [
"2cd8a64aa01f4af8be4ca4796bc36218",
"3e04b8a847f849aa8882a82328456fe9",
"c6b0ee0554814433a2e9d017be0b4764",
"342c742d0428448ab0915fa9acc0a8b3",
"0fd76df09026425d8295bb3a4582306f",
"fe663c0c6acd412d9c53ee042b529b15",
"e1cf6cf1869643ac849b8087afabf28a",
"c9cf1e9be6fe4f3d992a7171e64aab04",
"adef47740fdd405ab23f86097e63c943",
"321d62c42afb4b7d82ca0220e10fcb51",
"6996b076a0d248988bdbbedd63bfbb1a",
"68bb25b4b34f45b1b96fd0aaa2bbe9e1",
"5a3785fd5fd6491dbb9b3f648031250d",
"2cab8011ddae4c5395103a49ffa26525",
"1906d6e2a54e4e3fbcfa9f5e5fcef881",
"d091822808bc4a65baf6a4e9209e482b",
"98dd823fd06544ce883462c3a35ad211",
"a71549cb8128458cbe9670bde0d40340",
"b28da7e2dd4b402ebe28222dad7a4597",
"3b0b22781d8b4fc2984b31eb787ae37e",
"de59ad549fc44c03b2ff70f70516d2d2",
"8210236f97e846b3b6d40da412b833d1",
"a8fb7a2bcc754e6fbbaba30c13e345c4",
"4a98c8721f2646628ca96a94f9b5598b",
"988b4b19d4c54748a95608d67dcfd61f",
"dd2d12ffa826440f83c6a81d9ec35ea4",
"cc0747fa80ab4f79a2f2955bc7aea90b",
"88811a26e491419d972228482880accd",
"ecb73cff909a4ca0a1a652998c33bb39",
"f4818e774c354911bcd1ed45a1a70eb5",
"45aca61546444d84b30d8d34672ad36d",
"1254d5f25b744fe7bc4623a3054300ef",
"d5c2c1c3f8944ed7bbb1da12085d1d73",
"464af6b4815a45ee81a075d1bc831a23",
"52fdd1acbac042fdaf34e3018fcb63c5",
"ec97ae5541194b3e9804830b3af2109e",
"88014bdf674c41bc88f338caf0189357",
"9ba782f93ad544aa8929edcb9cccddae",
"ff9e1e330a474c8f822596082009394f",
"e9eabee64f6d4a3ca36506d0aabd2a1b",
"85adc31594a34bdd91a403bab6728cc2",
"9a09ae48970a4426a771d079b7711cfd",
"67417f29210d499da41731b0cf6265f1",
"95e7360807b74c02a7ab5958e9dd33ae",
"0642f353f91f404f89d3a8366e34036d",
"2152db58b19c4a7d9da12c20b9975cb1",
"038ad9a0be974cec9e8327cec84ea956",
"d9b2c54a132e4cff8384b5a1da119cd8",
"c1c47bee3a5c4efeaab120a3b67f40f0",
"b80a07ecf2d84f42b6b05a96959dbce7",
"36c4adfe9a2447199ff49f8ec1a2ad79",
"583814f060d64fa4bf5ffccff223beaa",
"5fcfc7ee7b784f1483b3ba2372901837",
"f69b57872d3542f697463dcc6138093c",
"646001c640654c1bb13f116a71e2fe12",
"d240ae77302e44fa8d8cefd41db7070a",
"87c2f03a634d4884a84957e4a9732921",
"d067d3741ebf4f44b9cc97544d037be8",
"a381d81125a14b82ac09507b632ce0d9",
"cd53914499cd474e8939bc8336f83f47",
"1875ecf498964c61a68c7a22511b76d5",
"464caf8d8cfc444a9325e8444ab66b48",
"e9bd14368db34cd8a88495e11f4a3011",
"1da3d016cfde44bfa9134141c3b85f63",
"b42f35766f9b45b08cce9c87d61227f8",
"c54b0b0e92e943b48a6ffa626686613d",
"11cc039c692a4d758c49491690791a71",
"fa2eb2e441594545a2f030277a80fbd6",
"ac42d3d26fec4b028d36860881041313",
"c6a507c3efcd4a4d8cfbda3a888ae44e",
"e680bc875b3f4f42afe5427d80651a59",
"99339defd1a649d6a4a5b961223b88b1",
"38f5de4afba64d86943f6396872573ab",
"63fb7fc9cbe44fe9b8d73f6a07e4d1ea",
"b396f5203381461cad3ea17b7df1b0e8",
"9eb2324c0d8c4c0bbd968958ee6f4eb1",
"c6d8d5819e5e4f4885ba207c2d938484",
"e6d2cc38c0514debac750dbb4e2cdcf4",
"ab6575c1686e439d8c55c6d9b4a5a05d",
"24b2327e585b458983228e4e89c371b6",
"a38ce152abc7491d9364dd662e3f600f",
"346632361133481090e7a7fda6e3528c",
"7ed83eef6fa04290876741329dda1454",
"ef2934f3870b44d791bff8ec205fae83",
"fc10cd7dbfe64a81aaf70320564be57e",
"f035a56a9fdc427eb46f7d14a21622da",
"810ab543ede545ad81cb2d80279b1cde",
"eab7510912c34ac29e3e9c6ab8267b80",
"9d2c0151b5064794b022bcdb0da8ad98",
"862e5eca29dc4706a875e021c2fc23aa",
"95cfc0e17f284ff3885eeda47692d17c",
"44254edada204abb91aa7404372440c4",
"bfa46c8e2c8248889b9762b26d0fd28e",
"545ab413f0c742129ab52146358a5319",
"61877f825a7e42c7bb49c07d59f7daaf",
"49e26c886e094ca7aa2f19863c024b1f",
"16f410cd5beb4f06a6f2956ea7c0e125",
"24478c3a405d438c882385e5ae4c2edf",
"6da110acfe6743a5a4243dc42f5e94f1",
"9cfe55a04c1843d7827209e331d49dc0",
"af044bda42724154848cb2a9a7b0794a",
"86878eb3fe6447da9c2a2517ceb96b0c",
"8f555fec379c4b14821e77b66025394c",
"6bf185deb7944d57b858a03c9ba6077a",
"b24ccccdd04c455bbfedc9fa217b2dd6",
"36b99af7f0c94413b2440744bb7c5fa2",
"3089bcfee22544759b4613b3b6037aca",
"624caa8e6ad64ab28753ec87dafe8dd9",
"67df1bb534d640668e657cf64d577889",
"74d4f5bf52184dbca271b406e0761391",
"efeda61fa58a450aa8fe23f8c5782b4d",
"496a55c53170450786b0d5ecb68a9453",
"5a310c918bc94d76978eb88aaca8eecc",
"f2d7138a21b746298a629c20e73581c3",
"2fabdd86911b42cea93768be87749c49",
"7c6f07b95f754a6eb9b55eba102a6ef4",
"a17db543e2664b1ba1190f1278b0481b",
"90fb752a27c1475eb23ff6efe60f9a29",
"cef3f2a728d14d42b48a81f652908f25",
"9975ff3b5517426d881e0f8770d83d89",
"f9ba9aecf1204157a8d8d343251c25dc"
]
},
"id": "zpqhOpmrS45-",
"outputId": "3efc379f-1a1b-4831-b9ac-37fb684f3fba"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"_dlt_pipeline_state\n",
"[{'name': 'version', 'data_type': 'bigint', 'nullable': False}, {'name': 'engine_version', 'data_type': 'bigint', 'nullable': False}, {'name': 'pipeline_name', 'data_type': 'text', 'nullable': False}, {'name': 'state', 'data_type': 'text', 'nullable': False}, {'name': 'created_at', 'data_type': 'timestamp', 'nullable': False}, {'name': 'version_hash', 'data_type': 'text', 'nullable': True}, {'name': '_dlt_load_id', 'data_type': 'text', 'nullable': False}, {'name': '_dlt_id', 'data_type': 'text', 'nullable': False, 'unique': True}]\n",
"_dlt_version\n",
"[{'name': 'version', 'data_type': 'bigint', 'nullable': False}, {'name': 'engine_version', 'data_type': 'bigint', 'nullable': False}, {'name': 'inserted_at', 'data_type': 'timestamp', 'nullable': False}, {'name': 'schema_name', 'data_type': 'text', 'nullable': False}, {'name': 'version_hash', 'data_type': 'text', 'nullable': False}, {'name': 'schema', 'data_type': 'text', 'nullable': False}]\n",
"documents\n",
"[{'name': 'text', 'x-lancedb-embed': True, 'data_type': 'text', 'nullable': True}, {'name': 'section', 'data_type': 'text', 'nullable': True}, {'name': 'question', 'x-lancedb-embed': True, 'data_type': 'text', 'nullable': True}, {'name': '_dlt_load_id', 'data_type': 'text', 'nullable': False}, {'name': '_dlt_id', 'data_type': 'text', 'nullable': False, 'unique': True}]\n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n",
"The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
"To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
"You will be able to reuse this secret in all of your notebooks.\n",
"Please note that authentication is recommended but still optional to access public models or datasets.\n",
" warnings.warn(\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"modules.json: 0%| | 0.00/349 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "2cd8a64aa01f4af8be4ca4796bc36218"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"config_sentence_transformers.json: 0%| | 0.00/116 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "68bb25b4b34f45b1b96fd0aaa2bbe9e1"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"README.md: 0%| | 0.00/10.7k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "a8fb7a2bcc754e6fbbaba30c13e345c4"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"sentence_bert_config.json: 0%| | 0.00/53.0 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "464af6b4815a45ee81a075d1bc831a23"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
" warnings.warn(\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"config.json: 0%| | 0.00/612 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "0642f353f91f404f89d3a8366e34036d"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"model.safetensors: 0%| | 0.00/90.9M [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "d240ae77302e44fa8d8cefd41db7070a"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"tokenizer_config.json: 0%| | 0.00/350 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "11cc039c692a4d758c49491690791a71"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"vocab.txt: 0%| | 0.00/232k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "e6d2cc38c0514debac750dbb4e2cdcf4"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"tokenizer.json: 0%| | 0.00/466k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "9d2c0151b5064794b022bcdb0da8ad98"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"special_tokens_map.json: 0%| | 0.00/112 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "9cfe55a04c1843d7827209e331d49dc0"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"1_Pooling/config.json: 0%| | 0.00/190 [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "efeda61fa58a450aa8fe23f8c5782b4d"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"_dlt_loads\n",
"[{'name': 'load_id', 'data_type': 'text', 'nullable': False}, {'name': 'schema_name', 'data_type': 'text', 'nullable': True}, {'name': 'status', 'data_type': 'bigint', 'nullable': False}, {'name': 'inserted_at', 'data_type': 'timestamp', 'nullable': False}, {'name': 'schema_version_hash', 'data_type': 'text', 'nullable': True}]\n",
"UPLOAD\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"db = lancedb.connect(\"./.lancedb\")"
],
"metadata": {
"id": "BnOvwU8kTBCL"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"db_table = db.open_table(\"qanda_embedded___documents\")"
],
"metadata": {
"id": "lp4C-2h4THdK"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"That's all for this intro example! The DB could now be used as a basis for a RAG."
],
"metadata": {
"id": "L4yyirj_kI7h"
}
},
{
"cell_type": "markdown",
"source": [
"# Create an up-to-date RAG with dlt and LanceDB"
],
"metadata": {
"id": "-OEpbMGNZexo"
}
},
{
"cell_type": "markdown",
"source": [
"**Note on running this notebook**: We are going to download and use a local Ollama instance for the RAG, so preferably select the **T4 GPU** in the runtime when starting this notebook (Runtime > Change runtime type > Hardware accelerator > T4 GPU).\n",
"\n",
"You can also use the default CPU in case you're facing technical issues, but then your LLM responses might be slower (~2 mins/response)"
],
"metadata": {
"id": "zQUsxggP0tje"
}
},
{
"cell_type": "markdown",
"source": [
"## Part 1: Create a Notion -> LanceDB pipeline using dlt"
],
"metadata": {
"id": "pAGJAVLzZCDn"
}
},
{
"cell_type": "markdown",
"source": [
"### 1. Install requirements"
],
"metadata": {
"id": "b5zDmfFcwS5M"
}
},
{
"cell_type": "markdown",
"source": [
"To create a notion -> lancedb pipeline, we need to install:\n",
"1. dlt with lancedb extras\n",
"2. sentence-transformers: we need to use an embedding model to vectorize and store data inside LanceDB. For this we choose the open-source model \"sentence-transformers/all-MiniLM-L6-v2\"."
],
"metadata": {
"id": "aOUpcKhnwggh"
}
},
{
"cell_type": "code",
"source": [
"%%capture\n",
"!pip install dlt[lancedb]==0.5.1a0\n",
"!pip install sentence-transformers\n",
"!yes | dlt init rest_api lancedb"
],
"metadata": {
"id": "4AVm0h1rjv8r"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### 2. Create a dlt project with rest_api source and lancedb destination"
],
"metadata": {
"id": "_aqwtE-Owslg"
}
},
{
"cell_type": "markdown",
"source": [
"We now create a dlt project using the command `dlt init <source> <destination>`."
],
"metadata": {
"id": "sS6gu1f_cO7E"
}
},
{
"cell_type": "markdown",
"source": [
"### 3. Add API credentials"
],
"metadata": {
"id": "NqjfumInhWLK"
}
},
{
"cell_type": "markdown",
"source": [
"We are going to be using option 2. It's not advisable to paste sensitive information like API keys inside the code, so instead we're going to include them inside the secrets tab in the side panel of the notebook. This will allow us to access the secret values from the notebook.\n",
"\n",
"Since we are using the OSS version of LanceDB and OSS embedding models, we only need to specify the API key for Notion.\n",
"\n",
"**Note**: You will need to copy the [notion API key](https://share.1password.com/s#da9KgMwPaZUaey3WCaD7ICJoyHDGd3Xos2EZ29WrSWQ) into the secrets tab under the name `SOURCES__REST_API__NOTION__API_KEY`. Make sure to enable notebook access after pasting the key."
],
"metadata": {
"id": "J1nF956xoqyy"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"from google.colab import userdata\n",
"\n",
"os.environ[\"SOURCES__REST_API__NOTION__API_KEY\"] = userdata.get(\"SOURCES__REST_API__NOTION__API_KEY\")\n",
"\n",
"os.environ[\"DESTINATION__LANCEDB__EMBEDDING_MODEL_PROVIDER\"] = \"sentence-transformers\"\n",
"os.environ[\"DESTINATION__LANCEDB__EMBEDDING_MODEL\"] = \"all-MiniLM-L6-v2\"\n",
"\n",
"os.environ[\"DESTINATION__LANCEDB__CREDENTIALS__URI\"] = \".lancedb\""
],
"metadata": {
"id": "vSLP6qhNqafV"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### 4. Write the pipeline code"
],
"metadata": {
"id": "Eg9ySPDHtYLw"
}
},
{
"cell_type": "markdown",
"source": [
"**Note**: We first go over the code step by step before putting it into runnable cells\n",
"\n",
"1. Import necessary modules (run this cell)"
],
"metadata": {
"id": "0PuHEBIVtl-h"
}
},
{
"cell_type": "code",
"source": [
"import dlt\n",
"from rest_api import RESTAPIConfig, rest_api_source\n",
"\n",
"from dlt.sources.helpers.rest_client.paginators import BasePaginator, JSONResponsePaginator\n",
"from dlt.sources.helpers.requests import Response, Request\n",
"\n",
"from dlt.destinations.adapters import lancedb_adapter"
],
"metadata": {
"id": "BiA7UEAmtoFy"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"### 5. Run the pipeline"
],
"metadata": {
"id": "Ll95duMkuKDx"
}
},
{
"cell_type": "markdown",
"source": [
"Run this block:"
],
"metadata": {
"id": "66l6khvmpDAD"
}
},
{
"cell_type": "code",
"source": [
"from datetime import datetime, timezone\n",
"\n",
"class PostBodyPaginator(BasePaginator):\n",
" def __init__(self):\n",
" super().__init__()\n",
" self.cursor = None\n",
"\n",
" def update_state(self, response: Response) -> None:\n",
" # Assuming the API returns an empty list when no more data is available\n",
" if not response.json():\n",
" self._has_next_page = False\n",
" else:\n",
" self.cursor = response.json().get(\"next_cursor\")\n",
" if self.cursor is None:\n",
" self._has_next_page = False\n",
"\n",
" def update_request(self, request: Request) -> None:\n",
" if request.json is None:\n",
" request.json = {}\n",
"\n",
" # Add the cursor to the request body\n",
" request.json[\"start_cursor\"] = self.cursor\n",
"\n",
"@dlt.resource(name=\"employee_handbook\")\n",
"def rest_api_notion_resource():\n",
" notion_config: RESTAPIConfig = {\n",
" \"client\": {\n",
" \"base_url\": \"https://api.notion.com/v1/\",\n",
" \"auth\": {\n",
" \"token\": dlt.secrets[\"sources.rest_api.notion.api_key\"]\n",
" },\n",
" \"headers\":{\n",
" \"Content-Type\": \"application/json\",\n",
" \"Notion-Version\": \"2022-06-28\"\n",
" }\n",
" },\n",
" \"resources\": [\n",
" {\n",
" \"name\": \"search\",\n",
" \"endpoint\": {\n",
" \"path\": \"search\",\n",
" \"method\": \"POST\",\n",
" \"paginator\": PostBodyPaginator(),\n",
" \"json\": {\n",
" \"query\": \"workshop\",\n",
" \"sort\": {\n",
" \"direction\": \"ascending\",\n",
" \"timestamp\": \"last_edited_time\"\n",
" }\n",
" },\n",
" \"data_selector\": \"results\"\n",
" }\n",
" },\n",
" {\n",
" \"name\": \"page_content\",\n",
" \"endpoint\": {\n",
" \"path\": \"blocks/{page_id}/children\",\n",
" \"paginator\": JSONResponsePaginator(),\n",
" \"params\": {\n",
" \"page_id\": {\n",
" \"type\": \"resolve\",\n",
" \"resource\": \"search\",\n",
" \"field\": \"id\"\n",
" }\n",
" },\n",
" }\n",
" }\n",
" ]\n",
" }\n",
"\n",
" yield from rest_api_source(notion_config,name=\"employee_handbook\")\n",
"\n",
"def extract_page_content(response):\n",
" block_id = response[\"id\"]\n",
" last_edited_time = response[\"last_edited_time\"]\n",
" block_type = response.get(\"type\", \"Not paragraph\")\n",
" if block_type != \"paragraph\":\n",
" content = \"\"\n",
" else:\n",
" try:\n",
" content = response[\"paragraph\"][\"rich_text\"][0][\"plain_text\"]\n",
" except IndexError:\n",
" content = \"\"\n",
" return {\n",
" \"block_id\": block_id,\n",
" \"block_type\": block_type,\n",
" \"content\": content,\n",
" \"last_edited_time\": last_edited_time,\n",
" \"inserted_at_time\": datetime.now(timezone.utc)\n",
" }\n",
"\n",
"@dlt.resource(\n",
" name=\"employee_handbook\",\n",
" write_disposition=\"merge\",\n",
" primary_key=\"block_id\",\n",
" columns={\"last_edited_time\":{\"dedup_sort\":\"desc\"}}\n",
" )\n",
"def rest_api_notion_incremental(\n",
" last_edited_time = dlt.sources.incremental(\"last_edited_time\", initial_value=\"2024-06-26T08:16:00.000Z\",primary_key=(\"block_id\"))\n",
"):\n",
" # last_value = last_edited_time.last_value\n",
" # print(last_value)\n",
"\n",
" for block in rest_api_notion_resource.add_map(extract_page_content):\n",
" if not(len(block[\"content\"])):\n",
" continue\n",
" yield block\n",
"\n",
"def load_notion() -> None:\n",
" pipeline = dlt.pipeline(\n",
" pipeline_name=\"company_policies\",\n",
" destination=\"lancedb\",\n",
" dataset_name=\"notion_pages\",\n",
" # full_refresh=True\n",
" )\n",
"\n",
" load_info = pipeline.run(\n",
" lancedb_adapter(\n",
" rest_api_notion_incremental,\n",
" embed=\"content\"\n",
" ),\n",
" table_name=\"employee_handbook\",\n",
" write_disposition=\"merge\"\n",
" )\n",
" print(load_info)\n",
"\n",
"load_notion()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "3RzeKOoNUsyL",
"outputId": "88cd6d38-c4fb-4756-cb94-1b599c274d92"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"employee_handbook\n",
"[{'name': 'block_id', 'nullable': False, 'primary_key': True, 'data_type': 'text'}, {'name': 'block_type', 'data_type': 'text', 'nullable': True}, {'name': 'content', 'x-lancedb-embed': True, 'data_type': 'text', 'nullable': True}, {'dedup_sort': 'desc', 'name': 'last_edited_time', 'data_type': 'timestamp', 'nullable': True}, {'name': 'inserted_at_time', 'data_type': 'timestamp', 'nullable': True}, {'name': '_dlt_load_id', 'data_type': 'text', 'nullable': False}, {'name': '_dlt_id', 'data_type': 'text', 'nullable': False, 'unique': True}]\n",
"_dlt_pipeline_state\n",
"[{'name': 'version', 'data_type': 'bigint', 'nullable': False}, {'name': 'engine_version', 'data_type': 'bigint', 'nullable': False}, {'name': 'pipeline_name', 'data_type': 'text', 'nullable': False}, {'name': 'state', 'data_type': 'text', 'nullable': False}, {'name': 'created_at', 'data_type': 'timestamp', 'nullable': False}, {'name': 'version_hash', 'data_type': 'text', 'nullable': True}, {'name': '_dlt_load_id', 'data_type': 'text', 'nullable': False}, {'name': '_dlt_id', 'data_type': 'text', 'nullable': False, 'unique': True}]\n",
"_dlt_version\n",
"[{'name': 'version', 'data_type': 'bigint', 'nullable': False}, {'name': 'engine_version', 'data_type': 'bigint', 'nullable': False}, {'name': 'inserted_at', 'data_type': 'timestamp', 'nullable': False}, {'name': 'schema_name', 'data_type': 'text', 'nullable': False}, {'name': 'version_hash', 'data_type': 'text', 'nullable': False}, {'name': 'schema', 'data_type': 'text', 'nullable': False}]\n",
"_dlt_loads\n",
"[{'name': 'load_id', 'data_type': 'text', 'nullable': False}, {'name': 'schema_name', 'data_type': 'text', 'nullable': True}, {'name': 'status', 'data_type': 'bigint', 'nullable': False}, {'name': 'inserted_at', 'data_type': 'timestamp', 'nullable': False}, {'name': 'schema_version_hash', 'data_type': 'text', 'nullable': True}]\n",
"UPLOAD\n",
"Pipeline company_policies load step completed in 4.62 seconds\n",
"1 load package(s) were loaded to destination LanceDB and into dataset notion_pages\n",
"The LanceDB destination used <dlt.destinations.impl.lancedb.configuration.LanceDBCredentials object at 0x7e18f5da5a20> location to store data\n",
"Load package 1721160202.258821 is LOADED and contains no failed jobs\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"### 6. Visualize the output"
],
"metadata": {
"id": "ps09cty1uN9r"
}
},
{
"cell_type": "code",
"source": [
"import lancedb\n",
"\n",
"db = lancedb.connect(\".lancedb\")\n",
"dbtable = db.open_table(\"notion_pages___employee_handbook\")\n",
"\n",
"dbtable.to_pandas()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"id": "B7hsT5i8Y24S",
"outputId": "3b4ad984-22a5-492c-cf3d-d0ef29a855f7"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" id__ \\\n",
"0 6adeb540-d180-5d40-bc84-c40e5c173ea1 \n",
"1 cffdb1bb-a146-5e90-8fbb-a1d577a2a98e \n",
"2 25cd721d-fd64-517f-9b3b-34e3fad3522e \n",
"3 c75b7ef9-96b6-551b-9cdd-795bbe01bb6e \n",
"4 7a69c4c0-cd55-5090-903e-facf23eadde5 \n",
"5 ff1141dc-88f6-500a-a8c3-c18e37661650 \n",
"6 71e89a85-ae0b-5b68-866b-bd3922ec7548 \n",
"7 a28e913f-761f-5684-8cd5-0d0c49e0338c \n",
"8 a18932d9-1583-5c42-bd0d-0f96738c5e6c \n",
"9 93661874-13a2-5a43-bed8-868005dfd5e2 \n",
"10 b220778f-1118-5c22-b614-3bc0fd0a602b \n",
"11 d0f801ba-d3cc-5252-ad6e-3285662b609c \n",
"12 579b97f9-a5e2-53af-b4f7-efc9ad5105ad \n",
"13 a9083b7e-22cc-5b1f-8040-cb7aa1f72338 \n",
"\n",
" vector__ \\\n",
"0 [-0.038923826, 0.12081745, 0.046208546, -0.005... \n",
"1 [-0.07993289, 0.13477291, 0.0053402567, -0.029... \n",
"2 [-0.10974315, 0.10586075, 0.0032906067, -0.021... \n",
"3 [0.050755575, -0.06461986, 0.06527378, 0.01465... \n",
"4 [0.0005233448, -0.054883398, 0.043573365, -0.0... \n",
"5 [0.03802632, -0.021509668, 0.0475278, 0.064706... \n",
"6 [-0.058588073, -0.07540443, 0.033775173, 0.009... \n",
"7 [-0.004968906, -0.003911958, 0.028705632, 0.00... \n",
"8 [0.032060914, 0.02424462, 0.008471355, 0.03179... \n",
"9 [-0.013155272, 0.008382475, 0.017044408, 0.051... \n",
"10 [0.027987445, 0.06734361, 0.039806426, 0.00774... \n",
"11 [0.03252609, 0.008159482, 0.084435634, 0.05564... \n",
"12 [-0.0073140753, 0.01471069, -0.019091198, 0.02... \n",
"13 [-0.031538416, 0.034259938, -0.027282655, 0.02... \n",
"\n",
" block_id block_type \\\n",
"0 baac0ba4-9b60-450e-8cc1-1e6e2a0fb7d9 paragraph \n",
"1 0e429073-6383-4918-8961-fcc66346067f paragraph \n",
"2 f4e006d7-9b38-49e9-94cf-552beaa75773 paragraph \n",
"3 71618ca5-6c62-4b66-bc0f-3d855e0c4b8b paragraph \n",
"4 cd15aaf5-6cdc-4a13-835c-2181fd7bf81e paragraph \n",
"5 a4b2f0c9-e0c8-4b3c-81e7-ef624809977d paragraph \n",
"6 c0262981-b5f1-4a57-a91f-2e75f649b86c paragraph \n",
"7 faacf4ec-90be-4e96-b8b9-29b5112bc7ca paragraph \n",
"8 e6021a51-f403-4950-80c2-ebff005c7289 paragraph \n",
"9 b8f4cc6d-c28c-4071-9545-caadce5eb37b paragraph \n",
"10 ea7a1beb-6874-4f41-966d-dc1f80a1f635 paragraph \n",
"11 bd7a9110-fac5-4270-9493-4039ca67b467 paragraph \n",
"12 b1718dee-8c0f-4189-8c75-0e8c7844a501 paragraph \n",
"13 5bfa90c5-461d-406a-9324-a1dd54bad0d5 paragraph \n",
"\n",
" content \\\n",
"0 In this section, we describe what we offer to ... \n",
"1 Employee health is important to us. We don’t d... \n",
"2 Our company is dedicated to maintaining a safe... \n",
"3 If your job doesn’t require you to be present ... \n",
"4 Remote working refers to working from a non-of... \n",
"5 There are some expenses that we will pay direc... \n",
"6 Our company operates between 9 a.m. to 7 p.m. ... \n",
"7 Employees receive [20 days] of Paid Time Off (... \n",
"8 Our company observes the following holidays: N... \n",
"9 These holidays are considered “off-days” for m... \n",
"10 Employees who are unable to work due to illnes... \n",
"11 Losing a loved one is traumatizing. If this ha... \n",
"12 In accordance with German law, we offer a comp... \n",
"13 We recognize the vital role that fathers and p... \n",
"\n",
" last_edited_time inserted_at_time \\\n",
"0 2024-07-03 17:34:00+00:00 2024-07-16 20:03:23.581097+00:00 \n",
"1 2024-06-26 08:46:00+00:00 2024-07-16 20:03:23.593178+00:00 \n",
"2 2024-07-03 17:26:00+00:00 2024-07-16 20:03:23.593687+00:00 \n",
"3 2024-06-26 08:52:00+00:00 2024-07-16 20:03:23.594060+00:00 \n",
"4 2024-07-03 17:19:00+00:00 2024-07-16 20:03:23.594429+00:00 \n",
"5 2024-07-05 22:32:00+00:00 2024-07-16 20:03:23.594804+00:00 \n",
"6 2024-07-08 15:35:00+00:00 2024-07-16 20:03:23.840228+00:00 \n",
"7 2024-06-26 09:03:00+00:00 2024-07-16 20:03:23.840805+00:00 \n",
"8 2024-06-26 09:08:00+00:00 2024-07-16 20:03:23.841265+00:00 \n",
"9 2024-06-26 09:09:00+00:00 2024-07-16 20:03:23.841635+00:00 \n",
"10 2024-06-26 09:11:00+00:00 2024-07-16 20:03:23.842006+00:00 \n",
"11 2024-06-26 09:17:00+00:00 2024-07-16 20:03:23.842354+00:00 \n",
"12 2024-06-26 09:20:00+00:00 2024-07-16 20:03:23.842705+00:00 \n",
"13 2024-06-26 09:21:00+00:00 2024-07-16 20:03:23.843078+00:00 \n",
"\n",
" _dlt_load_id _dlt_id \n",
"0 1721160202.258821 bfKskiwHLvxAlA \n",
"1 1721160202.258821 dYt+GNt46xDVjA \n",
"2 1721160202.258821 2uLY8sOeaVGM8Q \n",
"3 1721160202.258821 QUCAmqfPyWm7+A \n",
"4 1721160202.258821 oF8J1iBWC+kIyA \n",
"5 1721160202.258821 XmW7i6IEKsiwEA \n",
"6 1721160202.258821 OgT7VGAKBMNP0Q \n",
"7 1721160202.258821 dCzKVn5hIVS6+w \n",
"8 1721160202.258821 4F8UqflruqsG6g \n",
"9 1721160202.258821 GVuKtFXlmzkVpQ \n",
"10 1721160202.258821 RLuIzEENYv38Lw \n",
"11 1721160202.258821 eUMDlKaYn3cZLQ \n",
"12 1721160202.258821 EjnfKGWEjr3PyQ \n",
"13 1721160202.258821 VdTjCTuy078Oyw "
],
"text/html": [
"\n",
" <div id=\"df-ea789017-e6d9-40aa-8c44-166a8ee745d9\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id__</th>\n",
" <th>vector__</th>\n",
" <th>block_id</th>\n",
" <th>block_type</th>\n",
" <th>content</th>\n",
" <th>last_edited_time</th>\n",
" <th>inserted_at_time</th>\n",
" <th>_dlt_load_id</th>\n",
" <th>_dlt_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>6adeb540-d180-5d40-bc84-c40e5c173ea1</td>\n",
" <td>[-0.038923826, 0.12081745, 0.046208546, -0.005...</td>\n",
" <td>baac0ba4-9b60-450e-8cc1-1e6e2a0fb7d9</td>\n",
" <td>paragraph</td>\n",
" <td>In this section, we describe what we offer to ...</td>\n",
" <td>2024-07-03 17:34:00+00:00</td>\n",
" <td>2024-07-16 20:03:23.581097+00:00</td>\n",
" <td>1721160202.258821</td>\n",
" <td>bfKskiwHLvxAlA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>cffdb1bb-a146-5e90-8fbb-a1d577a2a98e</td>\n",
" <td>[-0.07993289, 0.13477291, 0.0053402567, -0.029...</td>\n",
" <td>0e429073-6383-4918-8961-fcc66346067f</td>\n",
" <td>paragraph</td>\n",
" <td>Employee health is important to us. We don’t d...</td>\n",
" <td>2024-06-26 08:46:00+00:00</td>\n",
" <td>2024-07-16 20:03:23.593178+00:00</td>\n",
" <td>1721160202.258821</td>\n",
" <td>dYt+GNt46xDVjA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>25cd721d-fd64-517f-9b3b-34e3fad3522e</td>\n",
" <td>[-0.10974315, 0.10586075, 0.0032906067, -0.021...</td>\n",
" <td>f4e006d7-9b38-49e9-94cf-552beaa75773</td>\n",
" <td>paragraph</td>\n",
" <td>Our company is dedicated to maintaining a safe...</td>\n",
" <td>2024-07-03 17:26:00+00:00</td>\n",
" <td>2024-07-16 20:03:23.593687+00:00</td>\n",
" <td>1721160202.258821</td>\n",
" <td>2uLY8sOeaVGM8Q</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>c75b7ef9-96b6-551b-9cdd-795bbe01bb6e</td>\n",
" <td>[0.050755575, -0.06461986, 0.06527378, 0.01465...</td>\n",
" <td>71618ca5-6c62-4b66-bc0f-3d855e0c4b8b</td>\n",
" <td>paragraph</td>\n",
" <td>If your job doesn’t require you to be present ...</td>\n",
" <td>2024-06-26 08:52:00+00:00</td>\n",
" <td>2024-07-16 20:03:23.594060+00:00</td>\n",
" <td>1721160202.258821</td>\n",
" <td>QUCAmqfPyWm7+A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7a69c4c0-cd55-5090-903e-facf23eadde5</td>\n",
" <td>[0.0005233448, -0.054883398, 0.043573365, -0.0...</td>\n",
" <td>cd15aaf5-6cdc-4a13-835c-2181fd7bf81e</td>\n",
" <td>paragraph</td>\n",
" <td>Remote working refers to working from a non-of...</td>\n",
" <td>2024-07-03 17:19:00+00:00</td>\n",
" <td>2024-07-16 20:03:23.594429+00:00</td>\n",
" <td>1721160202.258821</td>\n",
" <td>oF8J1iBWC+kIyA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>ff1141dc-88f6-500a-a8c3-c18e37661650</td>\n",
" <td>[0.03802632, -0.021509668, 0.0475278, 0.064706...</td>\n",
" <td>a4b2f0c9-e0c8-4b3c-81e7-ef624809977d</td>\n",
" <td>paragraph</td>\n",
" <td>There are some expenses that we will pay direc...</td>\n",
" <td>2024-07-05 22:32:00+00:00</td>\n",
" <td>2024-07-16 20:03:23.594804+00:00</td>\n",
" <td>1721160202.258821</td>\n",
" <td>XmW7i6IEKsiwEA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>71e89a85-ae0b-5b68-866b-bd3922ec7548</td>\n",
" <td>[-0.058588073, -0.07540443, 0.033775173, 0.009...</td>\n",
" <td>c0262981-b5f1-4a57-a91f-2e75f649b86c</td>\n",
" <td>paragraph</td>\n",
" <td>Our company operates between 9 a.m. to 7 p.m. ...</td>\n",
" <td>2024-07-08 15:35:00+00:00</td>\n",
" <td>2024-07-16 20:03:23.840228+00:00</td>\n",
" <td>1721160202.258821</td>\n",
" <td>OgT7VGAKBMNP0Q</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>a28e913f-761f-5684-8cd5-0d0c49e0338c</td>\n",
" <td>[-0.004968906, -0.003911958, 0.028705632, 0.00...</td>\n",
" <td>faacf4ec-90be-4e96-b8b9-29b5112bc7ca</td>\n",
" <td>paragraph</td>\n",
" <td>Employees receive [20 days] of Paid Time Off (...</td>\n",
" <td>2024-06-26 09:03:00+00:00</td>\n",
" <td>2024-07-16 20:03:23.840805+00:00</td>\n",
" <td>1721160202.258821</td>\n",
" <td>dCzKVn5hIVS6+w</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>a18932d9-1583-5c42-bd0d-0f96738c5e6c</td>\n",
" <td>[0.032060914, 0.02424462, 0.008471355, 0.03179...</td>\n",
" <td>e6021a51-f403-4950-80c2-ebff005c7289</td>\n",
" <td>paragraph</td>\n",
" <td>Our company observes the following holidays: N...</td>\n",
" <td>2024-06-26 09:08:00+00:00</td>\n",
" <td>2024-07-16 20:03:23.841265+00:00</td>\n",
" <td>1721160202.258821</td>\n",
" <td>4F8UqflruqsG6g</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>93661874-13a2-5a43-bed8-868005dfd5e2</td>\n",
" <td>[-0.013155272, 0.008382475, 0.017044408, 0.051...</td>\n",
" <td>b8f4cc6d-c28c-4071-9545-caadce5eb37b</td>\n",
" <td>paragraph</td>\n",
" <td>These holidays are considered “off-days” for m...</td>\n",
" <td>2024-06-26 09:09:00+00:00</td>\n",
" <td>2024-07-16 20:03:23.841635+00:00</td>\n",
" <td>1721160202.258821</td>\n",
" <td>GVuKtFXlmzkVpQ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>b220778f-1118-5c22-b614-3bc0fd0a602b</td>\n",
" <td>[0.027987445, 0.06734361, 0.039806426, 0.00774...</td>\n",
" <td>ea7a1beb-6874-4f41-966d-dc1f80a1f635</td>\n",
" <td>paragraph</td>\n",
" <td>Employees who are unable to work due to illnes...</td>\n",
" <td>2024-06-26 09:11:00+00:00</td>\n",
" <td>2024-07-16 20:03:23.842006+00:00</td>\n",
" <td>1721160202.258821</td>\n",
" <td>RLuIzEENYv38Lw</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>d0f801ba-d3cc-5252-ad6e-3285662b609c</td>\n",
" <td>[0.03252609, 0.008159482, 0.084435634, 0.05564...</td>\n",
" <td>bd7a9110-fac5-4270-9493-4039ca67b467</td>\n",
" <td>paragraph</td>\n",
" <td>Losing a loved one is traumatizing. If this ha...</td>\n",
" <td>2024-06-26 09:17:00+00:00</td>\n",
" <td>2024-07-16 20:03:23.842354+00:00</td>\n",
" <td>1721160202.258821</td>\n",
" <td>eUMDlKaYn3cZLQ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>579b97f9-a5e2-53af-b4f7-efc9ad5105ad</td>\n",
" <td>[-0.0073140753, 0.01471069, -0.019091198, 0.02...</td>\n",
" <td>b1718dee-8c0f-4189-8c75-0e8c7844a501</td>\n",
" <td>paragraph</td>\n",
" <td>In accordance with German law, we offer a comp...</td>\n",
" <td>2024-06-26 09:20:00+00:00</td>\n",
" <td>2024-07-16 20:03:23.842705+00:00</td>\n",
" <td>1721160202.258821</td>\n",
" <td>EjnfKGWEjr3PyQ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>a9083b7e-22cc-5b1f-8040-cb7aa1f72338</td>\n",
" <td>[-0.031538416, 0.034259938, -0.027282655, 0.02...</td>\n",
" <td>5bfa90c5-461d-406a-9324-a1dd54bad0d5</td>\n",
" <td>paragraph</td>\n",
" <td>We recognize the vital role that fathers and p...</td>\n",
" <td>2024-06-26 09:21:00+00:00</td>\n",
" <td>2024-07-16 20:03:23.843078+00:00</td>\n",
" <td>1721160202.258821</td>\n",
" <td>VdTjCTuy078Oyw</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ea789017-e6d9-40aa-8c44-166a8ee745d9')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-ea789017-e6d9-40aa-8c44-166a8ee745d9 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-ea789017-e6d9-40aa-8c44-166a8ee745d9');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-413bab6a-b2b9-45c9-bb85-b1dee4918a64\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-413bab6a-b2b9-45c9-bb85-b1dee4918a64')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-413bab6a-b2b9-45c9-bb85-b1dee4918a64 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"summary": "{\n \"name\": \"dbtable\",\n \"rows\": 14,\n \"fields\": [\n {\n \"column\": \"id__\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 14,\n \"samples\": [\n \"93661874-13a2-5a43-bed8-868005dfd5e2\",\n \"d0f801ba-d3cc-5252-ad6e-3285662b609c\",\n \"6adeb540-d180-5d40-bc84-c40e5c173ea1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vector__\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"block_id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 14,\n \"samples\": [\n \"b8f4cc6d-c28c-4071-9545-caadce5eb37b\",\n \"bd7a9110-fac5-4270-9493-4039ca67b467\",\n \"baac0ba4-9b60-450e-8cc1-1e6e2a0fb7d9\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"block_type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"paragraph\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"content\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 14,\n \"samples\": [\n \"These holidays are considered \\u201coff-days\\u201d for most employees. If you need a team member to work on a holiday, inform them at least three days in advance. If you are a non-exempt employee, you will receive your regular hourly rate with a premium for working on a holiday. If you are an exempt employee, we will grant you an additional day of PTO that you must take within 12 months after that holiday. We will count hours you worked on a holiday to decide whether you are entitled to overtime pay\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"last_edited_time\",\n \"properties\": {\n \"dtype\": \"date\",\n \"min\": \"2024-06-26 08:46:00+00:00\",\n \"max\": \"2024-07-08 15:35:00+00:00\",\n \"num_unique_values\": 14,\n \"samples\": [\n \"2024-06-26 09:09:00+00:00\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"inserted_at_time\",\n \"properties\": {\n \"dtype\": \"date\",\n \"min\": \"2024-07-16 20:03:23.581097+00:00\",\n \"max\": \"2024-07-16 20:03:23.843078+00:00\",\n \"num_unique_values\": 14,\n \"samples\": [\n \"2024-07-16 20:03:23.841635+00:00\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"_dlt_load_id\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"1721160202.258821\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"_dlt_id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 14,\n \"samples\": [\n \"GVuKtFXlmzkVpQ\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 13
}
]
},
{
"cell_type": "markdown",
"source": [
" ---"
],
"metadata": {
"id": "-4e3NR7a06eE"
}
},
{
"cell_type": "markdown",
"source": [
"Now we make change to one of the paragraphs and run the pipeline again to see the effect of incremental loading. We observe two things:\n",
"1. The column `inserted_at_time` only changed for the updated row, implying that only this row was added\n",
"2. Looking at the primary key `block_id` we see that the original row was dropped and the updated row was inserted"
],
"metadata": {
"id": "A-3A-YWGuRCi"
}
},
{
"cell_type": "markdown",
"source": [
"## Part 2: Create a RAG bot using Ollama"
],
"metadata": {
"id": "WuG395sV1rqb"
}
},
{
"cell_type": "markdown",
"source": [
"With the contents from the employee handbook vectorized and stored in LanceDB, we're now ready to create our RAG with Ollama.\n"
],
"metadata": {
"id": "IDzsT3Ms2KgC"
}
},
{
"cell_type": "markdown",
"source": [
"1. Install Ollama into the notebook's local runtime"
],
"metadata": {
"id": "Fqvsji-Vuilq"
}
},
{
"cell_type": "code",
"source": [
"%%capture\n",
"\n",
"!curl -fsSL https://ollama.com/install.sh | sh\n",
"!nohup ollama serve > nohup.out 2>&1 &\n",
"!ollama pull llama2-uncensored"
],
"metadata": {
"id": "kITCoZv85Ag5"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!pip install ollama"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "sQhykrlGZoId",
"outputId": "2b92f85e-dc52-46c5-a945-8b05145d6b07"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting ollama\n",
" Downloading ollama-0.2.1-py3-none-any.whl (9.7 kB)\n",
"Collecting httpx<0.28.0,>=0.27.0 (from ollama)\n",
" Downloading httpx-0.27.0-py3-none-any.whl (75 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx<0.28.0,>=0.27.0->ollama) (3.7.1)\n",
"Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<0.28.0,>=0.27.0->ollama) (2024.7.4)\n",
"Collecting httpcore==1.* (from httpx<0.28.0,>=0.27.0->ollama)\n",
" Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx<0.28.0,>=0.27.0->ollama) (3.7)\n",
"Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx<0.28.0,>=0.27.0->ollama) (1.3.1)\n",
"Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<0.28.0,>=0.27.0->ollama)\n",
" Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->httpx<0.28.0,>=0.27.0->ollama) (1.2.1)\n",
"Installing collected packages: h11, httpcore, httpx, ollama\n",
"Successfully installed h11-0.14.0 httpcore-1.0.5 httpx-0.27.0 ollama-0.2.1\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import ollama"
],
"metadata": {
"id": "2M4T3gF0bQQV"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"def retrieve_context_from_lancedb(dbtable, question, top_k=2):\n",
" query_results = dbtable.search(query=question).to_list()\n",
" context = \"\\n\".join([result[\"content\"] for result in query_results[:top_k]])\n",
"\n",
" return context"
],
"metadata": {
"id": "c1rSQm33qx2r"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#HOMEWORK\n",
"\n",
"\n",
"from datetime import datetime, timezone\n",
"\n",
"class PostBodyPaginator(BasePaginator):\n",
" def __init__(self):\n",
" super().__init__()\n",
" self.cursor = None\n",
"\n",
" def update_state(self, response: Response) -> None:\n",
" # Assuming the API returns an empty list when no more data is available\n",
" if not response.json():\n",
" self._has_next_page = False\n",
" else:\n",
" self.cursor = response.json().get(\"next_cursor\")\n",
" if self.cursor is None:\n",
" self._has_next_page = False\n",
"\n",
" def update_request(self, request: Request) -> None:\n",
" if request.json is None:\n",
" request.json = {}\n",
"\n",
" # Add the cursor to the request body\n",
" request.json[\"start_cursor\"] = self.cursor\n",
"\n",
"@dlt.resource(name=\"employee_handbook\")\n",
"def rest_api_notion_resource():\n",
" notion_config: RESTAPIConfig = {\n",
" \"client\": {\n",
" \"base_url\": \"https://api.notion.com/v1/\",\n",
" \"auth\": {\n",
" \"token\": dlt.secrets[\"sources.rest_api.notion.api_key\"]\n",
" },\n",
" \"headers\":{\n",
" \"Content-Type\": \"application/json\",\n",
" \"Notion-Version\": \"2022-06-28\"\n",
" }\n",
" },\n",
" \"resources\": [\n",
" {\n",
" \"name\": \"search\",\n",
" \"endpoint\": {\n",
" \"path\": \"search\",\n",
" \"method\": \"POST\",\n",
" \"paginator\": PostBodyPaginator(),\n",
" \"json\": {\n",
" \"query\": \"homework\",\n",
" \"sort\": {\n",
" \"direction\": \"ascending\",\n",
" \"timestamp\": \"last_edited_time\"\n",
" }\n",
" },\n",
" \"data_selector\": \"results\"\n",
" }\n",
" },\n",
" {\n",
" \"name\": \"page_content\",\n",
" \"endpoint\": {\n",
" \"path\": \"blocks/{page_id}/children\",\n",
" \"paginator\": JSONResponsePaginator(),\n",
" \"params\": {\n",
" \"page_id\": {\n",
" \"type\": \"resolve\",\n",
" \"resource\": \"search\",\n",
" \"field\": \"id\"\n",
" }\n",
" },\n",
" }\n",
" }\n",
" ]\n",
" }\n",
"\n",
" yield from rest_api_source(notion_config,name=\"employee_handbook\")\n",
"\n",
"def extract_page_content(response):\n",
" block_id = response[\"id\"]\n",
" last_edited_time = response[\"last_edited_time\"]\n",
" block_type = response.get(\"type\", \"Not paragraph\")\n",
" if block_type != \"paragraph\":\n",
" content = \"\"\n",
" else:\n",
" try:\n",
" content = response[\"paragraph\"][\"rich_text\"][0][\"plain_text\"]\n",
" except IndexError:\n",
" content = \"\"\n",
" return {\n",
" \"block_id\": block_id,\n",
" \"block_type\": block_type,\n",
" \"content\": content,\n",
" \"last_edited_time\": last_edited_time,\n",
" \"inserted_at_time\": datetime.now(timezone.utc)\n",
" }\n",
"\n",
"@dlt.resource(\n",
" name=\"homework\",\n",
" write_disposition=\"merge\",\n",
" primary_key=\"block_id\",\n",
" columns={\"last_edited_time\":{\"dedup_sort\":\"desc\"}}\n",
" )\n",
"def rest_api_notion_incremental(\n",
" last_edited_time = dlt.sources.incremental(\"last_edited_time\", initial_value=\"2024-06-26T08:16:00.000Z\",primary_key=(\"block_id\"))\n",
"):\n",
" # last_value = last_edited_time.last_value\n",
" # print(last_value)\n",
"\n",
" for block in rest_api_notion_resource.add_map(extract_page_content):\n",
" if not(len(block[\"content\"])):\n",
" continue\n",
" yield block\n",
"\n",
"def load_notion() -> None:\n",
" pipeline = dlt.pipeline(\n",
" pipeline_name=\"company_policies_hw\",\n",
" destination=\"lancedb\",\n",
" dataset_name=\"notion_pages\",\n",
" # full_refresh=True\n",
" )\n",
"\n",
" load_info = pipeline.run(\n",
" lancedb_adapter(\n",
" rest_api_notion_incremental,\n",
" embed=\"content\"\n",
" ),\n",
" table_name=\"homework\",\n",
" write_disposition=\"merge\"\n",
" )\n",
" print(load_info)\n"
],
"metadata": {
"id": "WRYeraXt30k-"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"load_notion()"
],
"metadata": {
"id": "uRR73XqvH9_D"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"TABLE=\"notion_pages___homework\"\n",
"db = lancedb.connect(\".lancedb\")\n",
"db.table_names()\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "E7hn0KtL4GdF",
"outputId": "fb4edb54-2584-4844-dcc2-e83917edf70f"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['notion_pages____dlt_loads',\n",
" 'notion_pages____dlt_pipeline_state',\n",
" 'notion_pages____dlt_version',\n",
" 'notion_pages___dltSentinelTable',\n",
" 'notion_pages___employee_handbook',\n",
" 'notion_pages___homework',\n",
" 'qanda____dlt_loads',\n",
" 'qanda____dlt_pipeline_state',\n",
" 'qanda____dlt_version',\n",
" 'qanda___dltSentinelTable',\n",
" 'qanda___documents',\n",
" 'qanda_embedded____dlt_loads',\n",
" 'qanda_embedded____dlt_pipeline_state',\n",
" 'qanda_embedded____dlt_version',\n",
" 'qanda_embedded___dltSentinelTable',\n",
" 'qanda_embedded___documents']"
]
},
"metadata": {},
"execution_count": 21
}
]
},
{
"cell_type": "code",
"source": [
"dbtable = db.open_table(TABLE)\n",
"df = dbtable.to_pandas()\n",
"df.shape\n",
"print(f'Q1) How many rows does the lancedb table \"notion_pages__homework\" have? ', df.shape[0], dbtable.count_rows())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "tEH5hqlR-I5o",
"outputId": "2fd496f1-0bbc-4620-86bb-562012b78372"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Q1) How many rows does the lancedb table \"notion_pages__homework\" have? 17 17\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(\n",
" f\"Q2) What value does it store after you've run your pipeline once? \",\n",
" df.last_edited_time.max(),\n",
")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "BTUSdmMQ4TX3",
"outputId": "85e2427e-c7f8-4f3f-a795-b9d97cd32150"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Q2) What value does it store after you've run your pipeline once? 2024-07-05 23:33:00+00:00\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"def rag(question, table=\"notion_pages___employee_handbook\", ):\n",
" # Connect to the lancedb table\n",
" db = lancedb.connect(\".lancedb\")\n",
" dbtable = db.open_table(table)\n",
"\n",
" # A system prompt telling ollama to accept input in the form of \"Question: ... ; Context: ...\"\n",
" messages = [\n",
" {\"role\": \"system\", \"content\": \"You are a helpful assistant that helps users understand policies inside a company's employee handbook. The user will first ask you a question and then provide you relevant paragraphs from the handbook as context. Please answer the question based on the provided context. For any details missing in the paragraph, encourage the employee to contact the HR for that information. Please keep the responses conversational.\"}\n",
" ]\n",
"\n",
" # Retrieve the relevant paragraphs on the question\n",
" context = retrieve_context_from_lancedb(dbtable,question,top_k=2)\n",
"\n",
" # Create a user prompt using the question and retrieved context\n",
" messages.append(\n",
" {\"role\": \"user\", \"content\": f\"Question: '{question}'; Context:'{context}'\"}\n",
" )\n",
"\n",
" # Get the response from the LLM\n",
" response = ollama.chat(\n",
" model=\"llama2-uncensored\",\n",
" messages=messages\n",
" )\n",
" response_content = response['message']['content']\n",
" print(f\"Assistant: {response_content}\")\n"
],
"metadata": {
"id": "9QB9a7gD8Z7W"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Q3\n",
"rag(\"how many PTO days are the employees entitled to in a year?\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "4e504d7f-dc82-4ea2-d0fb-2c743893a15e",
"id": "uEQClzZ_LqH_"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Assistant: Thank you for the question and context. Based on the information provided, it appears that employees in this company receive 20 days of Paid Time Off (PTO) per year. This means that the employee has accrued [40 days] by now. The employee can use PTO at any time after their first week with the company and earn one additional day per year up to a maximum of 25 days overall.\n",
"If the employee wants to take PTO, they should send a request through HRIS. Their manager or HR must approve before taking leave. There is no requirement for providing a reason for requesting PTO. If the company closes due to COVID-19, it may compensate accrued PTO with their final paycheck according to local law.\n",
"These holidays are considered “off-days” for most employees and if they need a team member to work on a holiday, they should inform them at least three days in advance. If the employee is non-exempt, they will receive their regular hourly rate with a premium for working on a holiday.\n",
"If the employee is exempt, they will be granted an additional day of PTO that they must take within 12 months after that holiday. The hours worked during the holiday will count towards deciding whether or not they are entitled to overtime pay.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# rag(\"how many PTO days are the employees entitled to in a year?\", TABLE)\n"
],
"metadata": {
"id": "6QgXsPHjB3Id"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment