Last active
January 18, 2024 09:58
-
-
Save ZanSara/0af1c2ac6c71d0a723c179cc6ec1ac41 to your computer and use it in GitHub Desktop.
Haystack 2.0 - RAG Pipelines from scratch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"gpuType": "T4" | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"d4dfda42872f4f77abbf66b97833c362": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HBoxModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HBoxView", | |
"box_style": "", | |
"children": [ | |
"IPY_MODEL_2c8ef68c44664ccda137d1cd91eb6f7f", | |
"IPY_MODEL_2cda6a3527b849db847184b88d1fd5b8", | |
"IPY_MODEL_b8f7d82157a7418f8eded2b65dbf401d" | |
], | |
"layout": "IPY_MODEL_f687f485cfd44886bc95c4b7c28fb3f1" | |
} | |
}, | |
"2c8ef68c44664ccda137d1cd91eb6f7f": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_1e354853c0a644079a831b45b7d66c99", | |
"placeholder": "", | |
"style": "IPY_MODEL_d4d449df1400492db4cd3f6428113990", | |
"value": "Ranking by BM25...: 100%" | |
} | |
}, | |
"2cda6a3527b849db847184b88d1fd5b8": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "FloatProgressModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "ProgressView", | |
"bar_style": "success", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_30bc8c55858b4d009f436c4843418c02", | |
"max": 4, | |
"min": 0, | |
"orientation": "horizontal", | |
"style": "IPY_MODEL_63341eb3676d42459fccd81842001637", | |
"value": 4 | |
} | |
}, | |
"b8f7d82157a7418f8eded2b65dbf401d": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_33c14dd20b364bd09281bd925c450ca8", | |
"placeholder": "", | |
"style": "IPY_MODEL_ff14949a23044e2081971907abf73dbb", | |
"value": " 4/4 [00:00<00:00, 159.78 docs/s]" | |
} | |
}, | |
"f687f485cfd44886bc95c4b7c28fb3f1": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"1e354853c0a644079a831b45b7d66c99": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"d4d449df1400492db4cd3f6428113990": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"30bc8c55858b4d009f436c4843418c02": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"63341eb3676d42459fccd81842001637": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "ProgressStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"bar_color": null, | |
"description_width": "" | |
} | |
}, | |
"33c14dd20b364bd09281bd925c450ca8": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"ff14949a23044e2081971907abf73dbb": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"b7e95e57b21b4e5784b1e697984844ba": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HBoxModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HBoxView", | |
"box_style": "", | |
"children": [ | |
"IPY_MODEL_3b9f505818b74f09a778089354014982", | |
"IPY_MODEL_e96dcde2b0604a869f5dc34df231f524", | |
"IPY_MODEL_5af5301d74524d46a768ab554df74c84" | |
], | |
"layout": "IPY_MODEL_26486e60d6564fbd9fe7150cfc746af1" | |
} | |
}, | |
"3b9f505818b74f09a778089354014982": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_ef15806a667b402d96b45e1199be8130", | |
"placeholder": "", | |
"style": "IPY_MODEL_f16d6ef7eeaa465fae0d79fc9e0f7027", | |
"value": "Ranking by BM25...: 100%" | |
} | |
}, | |
"e96dcde2b0604a869f5dc34df231f524": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "FloatProgressModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "ProgressView", | |
"bar_style": "success", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_a132706aafec4f30954221e561bfca80", | |
"max": 4, | |
"min": 0, | |
"orientation": "horizontal", | |
"style": "IPY_MODEL_f255fd94b7164c52b782dac65f7e62ec", | |
"value": 4 | |
} | |
}, | |
"5af5301d74524d46a768ab554df74c84": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_1c7fc1ce7a414359a90f49cb60c25193", | |
"placeholder": "", | |
"style": "IPY_MODEL_497b467a21804300a3c0a92897ca43c1", | |
"value": " 4/4 [00:00<00:00, 131.54 docs/s]" | |
} | |
}, | |
"26486e60d6564fbd9fe7150cfc746af1": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"ef15806a667b402d96b45e1199be8130": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"f16d6ef7eeaa465fae0d79fc9e0f7027": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"a132706aafec4f30954221e561bfca80": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"f255fd94b7164c52b782dac65f7e62ec": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "ProgressStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"bar_color": null, | |
"description_width": "" | |
} | |
}, | |
"1c7fc1ce7a414359a90f49cb60c25193": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"497b467a21804300a3c0a92897ca43c1": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"be678df9ec144b288cb9ac1b9e86b1fb": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HBoxModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HBoxView", | |
"box_style": "", | |
"children": [ | |
"IPY_MODEL_61e35495d0834901a91ebe21810e4d2c", | |
"IPY_MODEL_b289679b43fa48bdb7c7fe5329967675", | |
"IPY_MODEL_d41b91a377544abc836629e7a46a8cec" | |
], | |
"layout": "IPY_MODEL_81da0bfb51aa4536abd6da0a9b68e6e6" | |
} | |
}, | |
"61e35495d0834901a91ebe21810e4d2c": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_3433f3c36cc1423a8ef99fcc7b5eec1d", | |
"placeholder": "", | |
"style": "IPY_MODEL_f114e2182dcd4096a84c56bbe8eab1b0", | |
"value": "Ranking by BM25...: 100%" | |
} | |
}, | |
"b289679b43fa48bdb7c7fe5329967675": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "FloatProgressModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "ProgressView", | |
"bar_style": "success", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_b2c1b1787aff478e9b647593bda28724", | |
"max": 4, | |
"min": 0, | |
"orientation": "horizontal", | |
"style": "IPY_MODEL_f7cf9933c6af4fa08172bd116d437125", | |
"value": 4 | |
} | |
}, | |
"d41b91a377544abc836629e7a46a8cec": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_bf5dcae1b4fc42ecab2017129429d028", | |
"placeholder": "", | |
"style": "IPY_MODEL_d1b52e9f589b4a27ad798461df285fbe", | |
"value": " 4/4 [00:00<00:00, 213.09 docs/s]" | |
} | |
}, | |
"81da0bfb51aa4536abd6da0a9b68e6e6": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"3433f3c36cc1423a8ef99fcc7b5eec1d": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"f114e2182dcd4096a84c56bbe8eab1b0": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"b2c1b1787aff478e9b647593bda28724": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"f7cf9933c6af4fa08172bd116d437125": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "ProgressStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"bar_color": null, | |
"description_width": "" | |
} | |
}, | |
"bf5dcae1b4fc42ecab2017129429d028": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"d1b52e9f589b4a27ad798461df285fbe": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
} | |
} | |
}, | |
"accelerator": "GPU" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"*From https://www.zansara.dev/posts/2023-10-27-haystack-series-rag/*" | |
], | |
"metadata": { | |
"id": "EOw2UFz2_9xM" | |
} | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# Install the libraries" | |
], | |
"metadata": { | |
"id": "n1DbBwkqX-ax" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "gixkExYMh9cA", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "22e5c020-353a-4fb2-f5d4-0ccb567a907e" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Requirement already satisfied: langdetect in /usr/local/lib/python3.10/dist-packages (1.0.9)\n", | |
"Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from langdetect) (1.16.0)\n", | |
"Requirement already satisfied: boilerpy3 in /usr/local/lib/python3.10/dist-packages (1.0.7)\n", | |
"Requirement already satisfied: transformers[sentencepiece,torch]==4.34.1 in /usr/local/lib/python3.10/dist-packages (4.34.1)\n", | |
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (3.13.1)\n", | |
"Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (0.17.3)\n", | |
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (1.23.5)\n", | |
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (23.2)\n", | |
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (6.0.1)\n", | |
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (2023.6.3)\n", | |
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (2.31.0)\n", | |
"Requirement already satisfied: tokenizers<0.15,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (0.14.1)\n", | |
"Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (0.4.1)\n", | |
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (4.66.1)\n", | |
"Requirement already satisfied: torch!=1.12.0,>=1.10 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (2.1.0+cu121)\n", | |
"Requirement already satisfied: accelerate>=0.20.3 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (0.26.1)\n", | |
"Requirement already satisfied: sentencepiece!=0.1.92,>=0.1.91 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (0.1.99)\n", | |
"Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (3.20.3)\n", | |
"Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.20.3->transformers[sentencepiece,torch]==4.34.1) (5.9.5)\n", | |
"Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers[sentencepiece,torch]==4.34.1) (2023.6.0)\n", | |
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers[sentencepiece,torch]==4.34.1) (4.9.0)\n", | |
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.10->transformers[sentencepiece,torch]==4.34.1) (1.12)\n", | |
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.10->transformers[sentencepiece,torch]==4.34.1) (3.2.1)\n", | |
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.10->transformers[sentencepiece,torch]==4.34.1) (3.1.3)\n", | |
"Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.10->transformers[sentencepiece,torch]==4.34.1) (2.1.0)\n", | |
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[sentencepiece,torch]==4.34.1) (3.3.2)\n", | |
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[sentencepiece,torch]==4.34.1) (3.6)\n", | |
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[sentencepiece,torch]==4.34.1) (2.0.7)\n", | |
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[sentencepiece,torch]==4.34.1) (2023.11.17)\n", | |
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch!=1.12.0,>=1.10->transformers[sentencepiece,torch]==4.34.1) (2.1.3)\n", | |
"Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch!=1.12.0,>=1.10->transformers[sentencepiece,torch]==4.34.1) (1.3.0)\n", | |
"Requirement already satisfied: haystack-ai==2.0.0b5 in /usr/local/lib/python3.10/dist-packages (2.0.0b5)\n", | |
"Requirement already satisfied: boilerpy3 in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (1.0.7)\n", | |
"Requirement already satisfied: haystack-bm25 in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (1.0.2)\n", | |
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (3.1.3)\n", | |
"Requirement already satisfied: lazy-imports in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (0.3.1)\n", | |
"Requirement already satisfied: more-itertools in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (10.1.0)\n", | |
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (3.2.1)\n", | |
"Requirement already satisfied: openai>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (1.8.0)\n", | |
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (1.5.3)\n", | |
"Requirement already satisfied: posthog in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (3.3.1)\n", | |
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (6.0.1)\n", | |
"Requirement already satisfied: tenacity in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (8.2.3)\n", | |
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (4.66.1)\n", | |
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (4.9.0)\n", | |
"Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai>=1.1.0->haystack-ai==2.0.0b5) (3.7.1)\n", | |
"Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai>=1.1.0->haystack-ai==2.0.0b5) (1.7.0)\n", | |
"Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from openai>=1.1.0->haystack-ai==2.0.0b5) (0.26.0)\n", | |
"Requirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from openai>=1.1.0->haystack-ai==2.0.0b5) (1.10.13)\n", | |
"Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai>=1.1.0->haystack-ai==2.0.0b5) (1.3.0)\n", | |
"Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from haystack-bm25->haystack-ai==2.0.0b5) (1.23.5)\n", | |
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->haystack-ai==2.0.0b5) (2.1.3)\n", | |
"Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->haystack-ai==2.0.0b5) (2.8.2)\n", | |
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->haystack-ai==2.0.0b5) (2023.3.post1)\n", | |
"Requirement already satisfied: requests<3.0,>=2.7 in /usr/local/lib/python3.10/dist-packages (from posthog->haystack-ai==2.0.0b5) (2.31.0)\n", | |
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from posthog->haystack-ai==2.0.0b5) (1.16.0)\n", | |
"Requirement already satisfied: monotonic>=1.5 in /usr/local/lib/python3.10/dist-packages (from posthog->haystack-ai==2.0.0b5) (1.6)\n", | |
"Requirement already satisfied: backoff>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from posthog->haystack-ai==2.0.0b5) (2.2.1)\n", | |
"Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai>=1.1.0->haystack-ai==2.0.0b5) (3.6)\n", | |
"Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai>=1.1.0->haystack-ai==2.0.0b5) (1.2.0)\n", | |
"Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai>=1.1.0->haystack-ai==2.0.0b5) (2023.11.17)\n", | |
"Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai>=1.1.0->haystack-ai==2.0.0b5) (1.0.2)\n", | |
"Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai>=1.1.0->haystack-ai==2.0.0b5) (0.14.0)\n", | |
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.7->posthog->haystack-ai==2.0.0b5) (3.3.2)\n", | |
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.7->posthog->haystack-ai==2.0.0b5) (2.0.7)\n" | |
] | |
} | |
], | |
"source": [ | |
"# Install haystack & some deps\n", | |
"%pip install langdetect\n", | |
"%pip install boilerpy3\n", | |
"%pip install transformers[torch,sentencepiece]==4.34.1\n", | |
"%pip install haystack-ai==2.0.0b5" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Get OpenAI API key\n", | |
"\n", | |
"import getpass\n", | |
"\n", | |
"api_key = getpass.getpass()" | |
], | |
"metadata": { | |
"id": "wzdNlKb2To-z", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "bdeed1bc-4471-4d0d-8d88-974246ff22ed" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"··········\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# Generators" | |
], | |
"metadata": { | |
"id": "Ubo3JEG-UnNf" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from haystack.components.generators import OpenAIGenerator\n", | |
"\n", | |
"generator = OpenAIGenerator(api_key=api_key)\n", | |
"\n", | |
"generator.run(prompt=\"What's the official language of France?\")" | |
], | |
"metadata": { | |
"id": "N-4JJYYai-i9", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "9693867c-c7e3-42b7-927f-f4aae49798d4" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'replies': ['The official language of France is French.'],\n", | |
" 'meta': [{'model': 'gpt-3.5-turbo-0613',\n", | |
" 'index': 0,\n", | |
" 'finish_reason': 'stop',\n", | |
" 'usage': {'completion_tokens': 8,\n", | |
" 'prompt_tokens': 15,\n", | |
" 'total_tokens': 23}}]}" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 48 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# PromptBuilder" | |
], | |
"metadata": { | |
"id": "kF6LrxvHVp9b" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from haystack.components.builders import PromptBuilder\n", | |
"\n", | |
"prompt_builder = PromptBuilder(template=\"What's the official language of {{ country }}?\")\n", | |
"\n", | |
"prompt_builder.run(country=\"France\")" | |
], | |
"metadata": { | |
"id": "2u_50rAUjpod", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "35e81bf1-9c7c-4cd3-cfe6-de31d77c46ac" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'prompt': \"What's the official language of France?\"}" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 49 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# A Simple Generative Pipeline" | |
], | |
"metadata": { | |
"id": "zfsbF9aqVtfX" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from haystack import Pipeline\n", | |
"from haystack.components.generators import OpenAIGenerator\n", | |
"from haystack.components.builders.prompt_builder import PromptBuilder\n", | |
"\n", | |
"pipe = Pipeline()\n", | |
"pipe.add_component(\"prompt_builder\", PromptBuilder(template=\"What's the official language of {{ country }}?\"))\n", | |
"pipe.add_component(\"llm\", OpenAIGenerator(api_key=api_key))\n", | |
"pipe.connect(\"prompt_builder\", \"llm\")\n", | |
"\n", | |
"pipe.run({\"prompt_builder\": {\"country\": \"France\"}})" | |
], | |
"metadata": { | |
"id": "14qTTokskoWJ", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "c85f60e7-60bc-407f-da90-c54929f35721" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'llm': {'replies': ['The official language of France is French.'],\n", | |
" 'meta': [{'model': 'gpt-3.5-turbo-0613',\n", | |
" 'index': 0,\n", | |
" 'finish_reason': 'stop',\n", | |
" 'usage': {'completion_tokens': 8,\n", | |
" 'prompt_tokens': 15,\n", | |
" 'total_tokens': 23}}]}}" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 50 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"pipe.draw(\"simple-llm-pipeline.png\")" | |
], | |
"metadata": { | |
"id": "NWDO5EUgVGhN" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# Make the LLM cheat" | |
], | |
"metadata": { | |
"id": "5ELhDczXWYPq" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"pipe.run({\"prompt_builder\": {\"country\": \"the Republic of Rose Island\"}})" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "oIEB_HfHWIFC", | |
"outputId": "772fe059-ec5f-4d25-ddad-973b1266510c" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'llm': {'replies': ['The official language of the Republic of Rose Island is Italian.'],\n", | |
" 'meta': [{'model': 'gpt-3.5-turbo-0613',\n", | |
" 'index': 0,\n", | |
" 'finish_reason': 'stop',\n", | |
" 'usage': {'completion_tokens': 12,\n", | |
" 'prompt_tokens': 19,\n", | |
" 'total_tokens': 31}}]}}" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 52 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"context_template = \"\"\"\n", | |
"Given the following information, answer the question.\n", | |
"Context: {{ context }}\n", | |
"Question: {{ question }}\n", | |
"\"\"\"\n", | |
"language_template = \"What's the official language of {{ country }}?\"\n", | |
"\n", | |
"pipe = Pipeline()\n", | |
"pipe.add_component(\"context_prompt\", PromptBuilder(template=context_template))\n", | |
"pipe.add_component(\"language_prompt\", PromptBuilder(template=language_template))\n", | |
"pipe.add_component(\"llm\", OpenAIGenerator(api_key=api_key))\n", | |
"pipe.connect(\"language_prompt\", \"context_prompt.question\")\n", | |
"pipe.connect(\"context_prompt\", \"llm\")\n", | |
"\n", | |
"pipe.run({\n", | |
" \"context_prompt\": {\"context\": \"Rose Island had its own government, currency, post office, and commercial establishments, and the official language was Esperanto.\"},\n", | |
" \"language_prompt\": {\"country\": \"the Republic of Rose Island\"}\n", | |
"})" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "6UaQ8n03s2rO", | |
"outputId": "7769e4d6-32e1-419b-ce49-523c3a778b6c" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'llm': {'replies': ['The official language of the Republic of Rose Island is Esperanto.'],\n", | |
" 'meta': [{'model': 'gpt-3.5-turbo-0613',\n", | |
" 'index': 0,\n", | |
" 'finish_reason': 'stop',\n", | |
" 'usage': {'completion_tokens': 13,\n", | |
" 'prompt_tokens': 57,\n", | |
" 'total_tokens': 70}}]}}" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 53 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"pipe.draw(\"double-prompt-builder-pipeline.png\")" | |
], | |
"metadata": { | |
"id": "ue-_-skjWh-Q" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"template = \"\"\"\n", | |
"Given the following information, answer the question.\n", | |
"Context: {{ context }}\n", | |
"Question: What's the official language of {{ country }}?\n", | |
"\"\"\"\n", | |
"pipe = Pipeline()\n", | |
"pipe.add_component(\"prompt_builder\", PromptBuilder(template=template))\n", | |
"pipe.add_component(\"llm\", OpenAIGenerator(api_key=api_key))\n", | |
"pipe.connect(\"prompt_builder\", \"llm\")\n", | |
"\n", | |
"pipe.run({\n", | |
" \"prompt_builder\": {\n", | |
" \"context\": \"Rose Island had its own government, currency, post office, and commercial establishments, and the official language was Esperanto.\",\n", | |
" \"country\": \"the Republic of Rose Island\"\n", | |
" }\n", | |
"})" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Gc2w5MD1WzzB", | |
"outputId": "f02b2196-90c6-4dde-f3aa-b106d39cd94c" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'llm': {'replies': ['The official language of the Republic of Rose Island is Esperanto.'],\n", | |
" 'meta': [{'model': 'gpt-3.5-turbo-0613',\n", | |
" 'index': 0,\n", | |
" 'finish_reason': 'stop',\n", | |
" 'usage': {'completion_tokens': 13,\n", | |
" 'prompt_tokens': 57,\n", | |
" 'total_tokens': 70}}]}}" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 55 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"pipe.draw(\"advanced-prompt-builder-pipeline.png\")" | |
], | |
"metadata": { | |
"id": "-simX98cW8SL" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# Retrieving the context" | |
], | |
"metadata": { | |
"id": "3V5sCS1bUqzS" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from haystack.dataclasses import Document\n", | |
"from haystack.document_stores.in_memory import InMemoryDocumentStore\n", | |
"\n", | |
"documents = [\n", | |
" Document(content=\"German is the the official language of Germany.\"),\n", | |
" Document(content=\"The capital of France is Paris, and its official language is French.\"),\n", | |
" Document(content=\"Italy recognizes a few official languages, but the most widespread one is Italian.\"),\n", | |
" Document(content=\"Esperanto has been adopted as official language for some microstates as well, such as the Republic of Rose Island, a short-lived microstate built on a sea platform in the Adriatic Sea.\")\n", | |
"]\n", | |
"docstore = InMemoryDocumentStore()\n", | |
"docstore.write_documents(documents=documents)\n", | |
"\n", | |
"docstore.filter_documents()" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "j0s5v7VCXhFo", | |
"outputId": "c48294bf-13a1-4f1d-8321-1996ede51f21" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"[Document(id=a9e71bb14b0adc6a4e7aaf715129fdae95ff01fa764d43a3eccb61794eb5eb5b, content: 'German is the the official language of Germany.'),\n", | |
" Document(id=72213a7ad56744ce248bf8608fad642859e5ec4b25519321f9eaa33fe1205fa5, content: 'The capital of France is Paris, and its official language is French.'),\n", | |
" Document(id=8101f840db7cf3a7567b07ca8cc52ac36be8881fb8ac400c4a6c16653dfa1a47, content: 'Italy recognizes a few official languages, but the most widespread one is Italian.'),\n", | |
" Document(id=eb573d87abd5cbd94f620c6cbce316af21a8f35e594d02a21b387d857fc4441b, content: 'Esperanto has been adopted as official language for some microstates as well, such as the Republic o...')]" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 57 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from haystack.components.retrievers.in_memory import InMemoryBM25Retriever\n", | |
"\n", | |
"retriever = InMemoryBM25Retriever(document_store=docstore)" | |
], | |
"metadata": { | |
"id": "hxpU-H_gXiNt" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"retriever.run(query=\"Rose Island\", top_k=1)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 84, | |
"referenced_widgets": [ | |
"d4dfda42872f4f77abbf66b97833c362", | |
"2c8ef68c44664ccda137d1cd91eb6f7f", | |
"2cda6a3527b849db847184b88d1fd5b8", | |
"b8f7d82157a7418f8eded2b65dbf401d", | |
"f687f485cfd44886bc95c4b7c28fb3f1", | |
"1e354853c0a644079a831b45b7d66c99", | |
"d4d449df1400492db4cd3f6428113990", | |
"30bc8c55858b4d009f436c4843418c02", | |
"63341eb3676d42459fccd81842001637", | |
"33c14dd20b364bd09281bd925c450ca8", | |
"ff14949a23044e2081971907abf73dbb" | |
] | |
}, | |
"id": "J6va4mCuXiIU", | |
"outputId": "05d3abc9-0e67-4f48-ff58-e4a1025accc6" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"Ranking by BM25...: 0%| | 0/4 [00:00<?, ? docs/s]" | |
], | |
"application/vnd.jupyter.widget-view+json": { | |
"version_major": 2, | |
"version_minor": 0, | |
"model_id": "d4dfda42872f4f77abbf66b97833c362" | |
} | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'documents': [Document(id=eb573d87abd5cbd94f620c6cbce316af21a8f35e594d02a21b387d857fc4441b, content: 'Esperanto has been adopted as official language for some microstates as well, such as the Republic o...', score: 1.1925645254031016)]}" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 59 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"retriever.run(query=\"Rose Island\", top_k=3)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 84, | |
"referenced_widgets": [ | |
"b7e95e57b21b4e5784b1e697984844ba", | |
"3b9f505818b74f09a778089354014982", | |
"e96dcde2b0604a869f5dc34df231f524", | |
"5af5301d74524d46a768ab554df74c84", | |
"26486e60d6564fbd9fe7150cfc746af1", | |
"ef15806a667b402d96b45e1199be8130", | |
"f16d6ef7eeaa465fae0d79fc9e0f7027", | |
"a132706aafec4f30954221e561bfca80", | |
"f255fd94b7164c52b782dac65f7e62ec", | |
"1c7fc1ce7a414359a90f49cb60c25193", | |
"497b467a21804300a3c0a92897ca43c1" | |
] | |
}, | |
"id": "iWpwPT4JXiCY", | |
"outputId": "ce74681d-3a58-484a-fb93-1c9eef108f1c" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"Ranking by BM25...: 0%| | 0/4 [00:00<?, ? docs/s]" | |
], | |
"application/vnd.jupyter.widget-view+json": { | |
"version_major": 2, | |
"version_minor": 0, | |
"model_id": "b7e95e57b21b4e5784b1e697984844ba" | |
} | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'documents': [Document(id=eb573d87abd5cbd94f620c6cbce316af21a8f35e594d02a21b387d857fc4441b, content: 'Esperanto has been adopted as official language for some microstates as well, such as the Republic o...', score: 1.1925645254031016)]}" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 60 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# Our first RAG Pipeline" | |
], | |
"metadata": { | |
"id": "0aRCL7YyX70Q" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"template = \"\"\"\n", | |
"Given the following information, answer the question.\n", | |
"\n", | |
"Context:\n", | |
"{% for document in documents %}\n", | |
" {{ document.content }}\n", | |
"{% endfor %}\n", | |
"\n", | |
"Question: What's the official language of {{ country }}?\n", | |
"\"\"\"\n", | |
"pipe = Pipeline()\n", | |
"\n", | |
"pipe.add_component(\"retriever\", InMemoryBM25Retriever(document_store=docstore))\n", | |
"pipe.add_component(\"prompt_builder\", PromptBuilder(template=template))\n", | |
"pipe.add_component(\"llm\", OpenAIGenerator(api_key=api_key))\n", | |
"pipe.connect(\"retriever\", \"prompt_builder.documents\")\n", | |
"pipe.connect(\"prompt_builder\", \"llm\")\n", | |
"\n", | |
"country = \"the Republic of Rose Island\"\n", | |
"pipe.run({\n", | |
" \"retriever\": {\"query\": country},\n", | |
" \"prompt_builder\": {\"country\": country}\n", | |
"})" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 171, | |
"referenced_widgets": [ | |
"be678df9ec144b288cb9ac1b9e86b1fb", | |
"61e35495d0834901a91ebe21810e4d2c", | |
"b289679b43fa48bdb7c7fe5329967675", | |
"d41b91a377544abc836629e7a46a8cec", | |
"81da0bfb51aa4536abd6da0a9b68e6e6", | |
"3433f3c36cc1423a8ef99fcc7b5eec1d", | |
"f114e2182dcd4096a84c56bbe8eab1b0", | |
"b2c1b1787aff478e9b647593bda28724", | |
"f7cf9933c6af4fa08172bd116d437125", | |
"bf5dcae1b4fc42ecab2017129429d028", | |
"d1b52e9f589b4a27ad798461df285fbe" | |
] | |
}, | |
"id": "jFmhoTfNXh7e", | |
"outputId": "05245674-90c7-45f5-d6b8-5b566352711f" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"Ranking by BM25...: 0%| | 0/4 [00:00<?, ? docs/s]" | |
], | |
"application/vnd.jupyter.widget-view+json": { | |
"version_major": 2, | |
"version_minor": 0, | |
"model_id": "be678df9ec144b288cb9ac1b9e86b1fb" | |
} | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'llm': {'replies': ['The official language of the Republic of Rose Island is Esperanto.'],\n", | |
" 'meta': [{'model': 'gpt-3.5-turbo-0613',\n", | |
" 'index': 0,\n", | |
" 'finish_reason': 'stop',\n", | |
" 'usage': {'completion_tokens': 13,\n", | |
" 'prompt_tokens': 114,\n", | |
" 'total_tokens': 127}}]}}" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 61 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"pipe.draw(\"simple-rag-pipeline.png\")" | |
], | |
"metadata": { | |
"id": "XkS0ozuZXhfV" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# Going to production" | |
], | |
"metadata": { | |
"id": "iGg33w2pmHr6" | |
} | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Setting up Elasticsearch 8 (MANUAL STEPS REQUIRED)" | |
], | |
"metadata": { | |
"id": "nHF3oUcUznyn" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"%%bash\n", | |
"\n", | |
"rm -rf elasticsearch*\n", | |
"wget -q https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.8.0-linux-x86_64.tar.gz\n", | |
"tar -xzf elasticsearch-8.8.0-linux-x86_64.tar.gz\n", | |
"sudo chown -R daemon:daemon elasticsearch-8.8.0/\n", | |
"umount /sys/fs/cgroup\n", | |
"apt install cgroup-tools" | |
], | |
"metadata": { | |
"id": "S6EuX-CDnBZh" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"%%bash --bg\n", | |
"\n", | |
"sudo -H -u daemon elasticsearch-8.8.0/bin/elasticsearch" | |
], | |
"metadata": { | |
"id": "zefnKPrawP0S" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# This part is important, since it takes some time for instance to load\n", | |
"import os\n", | |
"import time\n", | |
"time.sleep(60)" | |
], | |
"metadata": { | |
"id": "xaAAUNrywRgx" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!ps -ef | grep elastic" | |
], | |
"metadata": { | |
"id": "nOOr_HbFwSoa" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Type 'y' when requested\n", | |
"\n", | |
"!/content/elasticsearch-8.8.0/bin/elasticsearch-setup-passwords auto -url \"https://localhost:9200\"" | |
], | |
"metadata": { | |
"id": "1JmlziR8wWYD" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Copy the string at the bottom of the output and export it as an env var. It looks like this:\n", | |
"#\n", | |
"# Changed password for user elastic\n", | |
"# PASSWORD elastic = zkjH4RYRZbUjJk9xUYEV\n", | |
"#\n", | |
"# You need this password in the cell below as well.\n", | |
"\n", | |
"os.environ[\"ELASTICSEARCH_PASSWORD\"] = \"rsAhNQx4pI64TdjbFYL6\"" | |
], | |
"metadata": { | |
"id": "QzLZGbKhwjWk" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Test the password: copy it in the field when requested and check if the request succeeds\n", | |
"!curl --cacert /content/elasticsearch-8.8.0/config/certs/http_ca.crt -u elastic -H 'Content-Type: application/json' -XGET https://localhost:9200/?pretty=true" | |
], | |
"metadata": { | |
"id": "fb_byjShwfR1" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Install the Haystack Elasticsearch integration" | |
], | |
"metadata": { | |
"id": "MgGM2qKXzvUA" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"%pip install elasticsearch-haystack==0.1.2" | |
], | |
"metadata": { | |
"id": "xNm-mSN8mPFd" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Build the pipeline" | |
], | |
"metadata": { | |
"id": "hTTmopFX-jse" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from elasticsearch_haystack.document_store import ElasticsearchDocumentStore\n", | |
"\n", | |
"# Get the host where Elasticsearch is running, default to localhost\n", | |
"host = os.environ.get(\"ELASTICSEARCH_HOST\", \"https://localhost:9200\")\n", | |
"user = \"elastic\"\n", | |
"pwd = os.environ[\"ELASTICSEARCH_PASSWORD\"] # If this fails, make sure you uncommented the relevant lines during ES setup.\n", | |
"\n", | |
"docstore = ElasticsearchDocumentStore(hosts=[host], basic_auth=(user, pwd), ca_certs=\"/content/elasticsearch-8.8.0/config/certs/http_ca.crt\")" | |
], | |
"metadata": { | |
"id": "QRVh5wAHnxJz" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from haystack.document_stores import DuplicatePolicy\n", | |
"documents = [\n", | |
" Document(content=\"German is the the official language of Germany.\"),\n", | |
" Document(content=\"The capital of France is Paris, and its official language is French.\"),\n", | |
" Document(content=\"Italy recognizes a few official languages, but the most widespread one is Italian.\"),\n", | |
" Document(content=\"Esperanto has been adopted as official language for some microstates as well, such as the Republic of Rose Island, a short-lived microstate built on a sea platform in the Adriatic Sea.\")\n", | |
"]\n", | |
"docstore.write_documents(documents=documents, policy=DuplicatePolicy.OVERWRITE)\n", | |
"\n", | |
"docstore.filter_documents()" | |
], | |
"metadata": { | |
"id": "lAlQ0wiL1RfN" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from elasticsearch_haystack.bm25_retriever import ElasticsearchBM25Retriever\n", | |
"\n", | |
"template = \"\"\"\n", | |
"Given the following information, answer the question.\n", | |
"\n", | |
"Context:\n", | |
"{% for document in documents %}\n", | |
" {{ document.content }}\n", | |
"{% endfor %}\n", | |
"\n", | |
"Question: What's the official language of {{ country }}?\n", | |
"\"\"\"\n", | |
"\n", | |
"pipe = Pipeline()\n", | |
"pipe.add_component(\"retriever\", ElasticsearchBM25Retriever(document_store=docstore))\n", | |
"pipe.add_component(\"prompt_builder\", PromptBuilder(template=template))\n", | |
"pipe.add_component(\"llm\", OpenAIGenerator(api_key=api_key))\n", | |
"pipe.connect(\"retriever\", \"prompt_builder.documents\")\n", | |
"pipe.connect(\"prompt_builder\", \"llm\")\n", | |
"\n", | |
"pipe.draw(\"elasticsearch-rag-pipeline.png\")\n", | |
"\n", | |
"country = \"the Republic of Rose Island\"\n", | |
"pipe.run({\n", | |
" \"retriever\": {\"query\": country},\n", | |
" \"prompt_builder\": {\"country\": country}\n", | |
"})" | |
], | |
"metadata": { | |
"id": "rZhA_rK51EoJ" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment