Skip to content

Instantly share code, notes, and snippets.

@ZanSara
Last active January 18, 2024 09:58
Show Gist options
  • Save ZanSara/0af1c2ac6c71d0a723c179cc6ec1ac41 to your computer and use it in GitHub Desktop.
Save ZanSara/0af1c2ac6c71d0a723c179cc6ec1ac41 to your computer and use it in GitHub Desktop.
Haystack 2.0 - RAG Pipelines from scratch
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"gpuType": "T4"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"d4dfda42872f4f77abbf66b97833c362": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_2c8ef68c44664ccda137d1cd91eb6f7f",
"IPY_MODEL_2cda6a3527b849db847184b88d1fd5b8",
"IPY_MODEL_b8f7d82157a7418f8eded2b65dbf401d"
],
"layout": "IPY_MODEL_f687f485cfd44886bc95c4b7c28fb3f1"
}
},
"2c8ef68c44664ccda137d1cd91eb6f7f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_1e354853c0a644079a831b45b7d66c99",
"placeholder": "​",
"style": "IPY_MODEL_d4d449df1400492db4cd3f6428113990",
"value": "Ranking by BM25...: 100%"
}
},
"2cda6a3527b849db847184b88d1fd5b8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_30bc8c55858b4d009f436c4843418c02",
"max": 4,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_63341eb3676d42459fccd81842001637",
"value": 4
}
},
"b8f7d82157a7418f8eded2b65dbf401d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_33c14dd20b364bd09281bd925c450ca8",
"placeholder": "​",
"style": "IPY_MODEL_ff14949a23044e2081971907abf73dbb",
"value": " 4/4 [00:00<00:00, 159.78 docs/s]"
}
},
"f687f485cfd44886bc95c4b7c28fb3f1": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"1e354853c0a644079a831b45b7d66c99": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d4d449df1400492db4cd3f6428113990": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"30bc8c55858b4d009f436c4843418c02": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"63341eb3676d42459fccd81842001637": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"33c14dd20b364bd09281bd925c450ca8": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"ff14949a23044e2081971907abf73dbb": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"b7e95e57b21b4e5784b1e697984844ba": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_3b9f505818b74f09a778089354014982",
"IPY_MODEL_e96dcde2b0604a869f5dc34df231f524",
"IPY_MODEL_5af5301d74524d46a768ab554df74c84"
],
"layout": "IPY_MODEL_26486e60d6564fbd9fe7150cfc746af1"
}
},
"3b9f505818b74f09a778089354014982": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_ef15806a667b402d96b45e1199be8130",
"placeholder": "​",
"style": "IPY_MODEL_f16d6ef7eeaa465fae0d79fc9e0f7027",
"value": "Ranking by BM25...: 100%"
}
},
"e96dcde2b0604a869f5dc34df231f524": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_a132706aafec4f30954221e561bfca80",
"max": 4,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_f255fd94b7164c52b782dac65f7e62ec",
"value": 4
}
},
"5af5301d74524d46a768ab554df74c84": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_1c7fc1ce7a414359a90f49cb60c25193",
"placeholder": "​",
"style": "IPY_MODEL_497b467a21804300a3c0a92897ca43c1",
"value": " 4/4 [00:00<00:00, 131.54 docs/s]"
}
},
"26486e60d6564fbd9fe7150cfc746af1": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"ef15806a667b402d96b45e1199be8130": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f16d6ef7eeaa465fae0d79fc9e0f7027": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"a132706aafec4f30954221e561bfca80": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f255fd94b7164c52b782dac65f7e62ec": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"1c7fc1ce7a414359a90f49cb60c25193": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"497b467a21804300a3c0a92897ca43c1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"be678df9ec144b288cb9ac1b9e86b1fb": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_61e35495d0834901a91ebe21810e4d2c",
"IPY_MODEL_b289679b43fa48bdb7c7fe5329967675",
"IPY_MODEL_d41b91a377544abc836629e7a46a8cec"
],
"layout": "IPY_MODEL_81da0bfb51aa4536abd6da0a9b68e6e6"
}
},
"61e35495d0834901a91ebe21810e4d2c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_3433f3c36cc1423a8ef99fcc7b5eec1d",
"placeholder": "​",
"style": "IPY_MODEL_f114e2182dcd4096a84c56bbe8eab1b0",
"value": "Ranking by BM25...: 100%"
}
},
"b289679b43fa48bdb7c7fe5329967675": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b2c1b1787aff478e9b647593bda28724",
"max": 4,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_f7cf9933c6af4fa08172bd116d437125",
"value": 4
}
},
"d41b91a377544abc836629e7a46a8cec": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_bf5dcae1b4fc42ecab2017129429d028",
"placeholder": "​",
"style": "IPY_MODEL_d1b52e9f589b4a27ad798461df285fbe",
"value": " 4/4 [00:00<00:00, 213.09 docs/s]"
}
},
"81da0bfb51aa4536abd6da0a9b68e6e6": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"3433f3c36cc1423a8ef99fcc7b5eec1d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f114e2182dcd4096a84c56bbe8eab1b0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"b2c1b1787aff478e9b647593bda28724": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f7cf9933c6af4fa08172bd116d437125": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"bf5dcae1b4fc42ecab2017129429d028": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d1b52e9f589b4a27ad798461df285fbe": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"source": [
"*From https://www.zansara.dev/posts/2023-10-27-haystack-series-rag/*"
],
"metadata": {
"id": "EOw2UFz2_9xM"
}
},
{
"cell_type": "markdown",
"source": [
"# Install the libraries"
],
"metadata": {
"id": "n1DbBwkqX-ax"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "gixkExYMh9cA",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "22e5c020-353a-4fb2-f5d4-0ccb567a907e"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: langdetect in /usr/local/lib/python3.10/dist-packages (1.0.9)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from langdetect) (1.16.0)\n",
"Requirement already satisfied: boilerpy3 in /usr/local/lib/python3.10/dist-packages (1.0.7)\n",
"Requirement already satisfied: transformers[sentencepiece,torch]==4.34.1 in /usr/local/lib/python3.10/dist-packages (4.34.1)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (3.13.1)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (0.17.3)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (1.23.5)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (23.2)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (6.0.1)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (2023.6.3)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (2.31.0)\n",
"Requirement already satisfied: tokenizers<0.15,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (0.14.1)\n",
"Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (0.4.1)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (4.66.1)\n",
"Requirement already satisfied: torch!=1.12.0,>=1.10 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (2.1.0+cu121)\n",
"Requirement already satisfied: accelerate>=0.20.3 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (0.26.1)\n",
"Requirement already satisfied: sentencepiece!=0.1.92,>=0.1.91 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (0.1.99)\n",
"Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece,torch]==4.34.1) (3.20.3)\n",
"Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.20.3->transformers[sentencepiece,torch]==4.34.1) (5.9.5)\n",
"Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers[sentencepiece,torch]==4.34.1) (2023.6.0)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers[sentencepiece,torch]==4.34.1) (4.9.0)\n",
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.10->transformers[sentencepiece,torch]==4.34.1) (1.12)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.10->transformers[sentencepiece,torch]==4.34.1) (3.2.1)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.10->transformers[sentencepiece,torch]==4.34.1) (3.1.3)\n",
"Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.10->transformers[sentencepiece,torch]==4.34.1) (2.1.0)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[sentencepiece,torch]==4.34.1) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[sentencepiece,torch]==4.34.1) (3.6)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[sentencepiece,torch]==4.34.1) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[sentencepiece,torch]==4.34.1) (2023.11.17)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch!=1.12.0,>=1.10->transformers[sentencepiece,torch]==4.34.1) (2.1.3)\n",
"Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch!=1.12.0,>=1.10->transformers[sentencepiece,torch]==4.34.1) (1.3.0)\n",
"Requirement already satisfied: haystack-ai==2.0.0b5 in /usr/local/lib/python3.10/dist-packages (2.0.0b5)\n",
"Requirement already satisfied: boilerpy3 in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (1.0.7)\n",
"Requirement already satisfied: haystack-bm25 in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (1.0.2)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (3.1.3)\n",
"Requirement already satisfied: lazy-imports in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (0.3.1)\n",
"Requirement already satisfied: more-itertools in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (10.1.0)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (3.2.1)\n",
"Requirement already satisfied: openai>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (1.8.0)\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (1.5.3)\n",
"Requirement already satisfied: posthog in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (3.3.1)\n",
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (6.0.1)\n",
"Requirement already satisfied: tenacity in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (8.2.3)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (4.66.1)\n",
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.0.0b5) (4.9.0)\n",
"Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai>=1.1.0->haystack-ai==2.0.0b5) (3.7.1)\n",
"Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai>=1.1.0->haystack-ai==2.0.0b5) (1.7.0)\n",
"Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from openai>=1.1.0->haystack-ai==2.0.0b5) (0.26.0)\n",
"Requirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from openai>=1.1.0->haystack-ai==2.0.0b5) (1.10.13)\n",
"Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai>=1.1.0->haystack-ai==2.0.0b5) (1.3.0)\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from haystack-bm25->haystack-ai==2.0.0b5) (1.23.5)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->haystack-ai==2.0.0b5) (2.1.3)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->haystack-ai==2.0.0b5) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->haystack-ai==2.0.0b5) (2023.3.post1)\n",
"Requirement already satisfied: requests<3.0,>=2.7 in /usr/local/lib/python3.10/dist-packages (from posthog->haystack-ai==2.0.0b5) (2.31.0)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from posthog->haystack-ai==2.0.0b5) (1.16.0)\n",
"Requirement already satisfied: monotonic>=1.5 in /usr/local/lib/python3.10/dist-packages (from posthog->haystack-ai==2.0.0b5) (1.6)\n",
"Requirement already satisfied: backoff>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from posthog->haystack-ai==2.0.0b5) (2.2.1)\n",
"Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai>=1.1.0->haystack-ai==2.0.0b5) (3.6)\n",
"Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai>=1.1.0->haystack-ai==2.0.0b5) (1.2.0)\n",
"Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai>=1.1.0->haystack-ai==2.0.0b5) (2023.11.17)\n",
"Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai>=1.1.0->haystack-ai==2.0.0b5) (1.0.2)\n",
"Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai>=1.1.0->haystack-ai==2.0.0b5) (0.14.0)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.7->posthog->haystack-ai==2.0.0b5) (3.3.2)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.7->posthog->haystack-ai==2.0.0b5) (2.0.7)\n"
]
}
],
"source": [
"# Install haystack & some deps\n",
"%pip install langdetect\n",
"%pip install boilerpy3\n",
"%pip install transformers[torch,sentencepiece]==4.34.1\n",
"%pip install haystack-ai==2.0.0b5"
]
},
{
"cell_type": "code",
"source": [
"# Get OpenAI API key\n",
"\n",
"import getpass\n",
"\n",
"api_key = getpass.getpass()"
],
"metadata": {
"id": "wzdNlKb2To-z",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "bdeed1bc-4471-4d0d-8d88-974246ff22ed"
},
"execution_count": null,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"··········\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"# Generators"
],
"metadata": {
"id": "Ubo3JEG-UnNf"
}
},
{
"cell_type": "code",
"source": [
"from haystack.components.generators import OpenAIGenerator\n",
"\n",
"generator = OpenAIGenerator(api_key=api_key)\n",
"\n",
"generator.run(prompt=\"What's the official language of France?\")"
],
"metadata": {
"id": "N-4JJYYai-i9",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "9693867c-c7e3-42b7-927f-f4aae49798d4"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'replies': ['The official language of France is French.'],\n",
" 'meta': [{'model': 'gpt-3.5-turbo-0613',\n",
" 'index': 0,\n",
" 'finish_reason': 'stop',\n",
" 'usage': {'completion_tokens': 8,\n",
" 'prompt_tokens': 15,\n",
" 'total_tokens': 23}}]}"
]
},
"metadata": {},
"execution_count": 48
}
]
},
{
"cell_type": "markdown",
"source": [
"# PromptBuilder"
],
"metadata": {
"id": "kF6LrxvHVp9b"
}
},
{
"cell_type": "code",
"source": [
"from haystack.components.builders import PromptBuilder\n",
"\n",
"prompt_builder = PromptBuilder(template=\"What's the official language of {{ country }}?\")\n",
"\n",
"prompt_builder.run(country=\"France\")"
],
"metadata": {
"id": "2u_50rAUjpod",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "35e81bf1-9c7c-4cd3-cfe6-de31d77c46ac"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'prompt': \"What's the official language of France?\"}"
]
},
"metadata": {},
"execution_count": 49
}
]
},
{
"cell_type": "markdown",
"source": [
"# A Simple Generative Pipeline"
],
"metadata": {
"id": "zfsbF9aqVtfX"
}
},
{
"cell_type": "code",
"source": [
"from haystack import Pipeline\n",
"from haystack.components.generators import OpenAIGenerator\n",
"from haystack.components.builders.prompt_builder import PromptBuilder\n",
"\n",
"pipe = Pipeline()\n",
"pipe.add_component(\"prompt_builder\", PromptBuilder(template=\"What's the official language of {{ country }}?\"))\n",
"pipe.add_component(\"llm\", OpenAIGenerator(api_key=api_key))\n",
"pipe.connect(\"prompt_builder\", \"llm\")\n",
"\n",
"pipe.run({\"prompt_builder\": {\"country\": \"France\"}})"
],
"metadata": {
"id": "14qTTokskoWJ",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "c85f60e7-60bc-407f-da90-c54929f35721"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'llm': {'replies': ['The official language of France is French.'],\n",
" 'meta': [{'model': 'gpt-3.5-turbo-0613',\n",
" 'index': 0,\n",
" 'finish_reason': 'stop',\n",
" 'usage': {'completion_tokens': 8,\n",
" 'prompt_tokens': 15,\n",
" 'total_tokens': 23}}]}}"
]
},
"metadata": {},
"execution_count": 50
}
]
},
{
"cell_type": "code",
"source": [
"pipe.draw(\"simple-llm-pipeline.png\")"
],
"metadata": {
"id": "NWDO5EUgVGhN"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Make the LLM cheat"
],
"metadata": {
"id": "5ELhDczXWYPq"
}
},
{
"cell_type": "code",
"source": [
"pipe.run({\"prompt_builder\": {\"country\": \"the Republic of Rose Island\"}})"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "oIEB_HfHWIFC",
"outputId": "772fe059-ec5f-4d25-ddad-973b1266510c"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'llm': {'replies': ['The official language of the Republic of Rose Island is Italian.'],\n",
" 'meta': [{'model': 'gpt-3.5-turbo-0613',\n",
" 'index': 0,\n",
" 'finish_reason': 'stop',\n",
" 'usage': {'completion_tokens': 12,\n",
" 'prompt_tokens': 19,\n",
" 'total_tokens': 31}}]}}"
]
},
"metadata": {},
"execution_count": 52
}
]
},
{
"cell_type": "code",
"source": [
"context_template = \"\"\"\n",
"Given the following information, answer the question.\n",
"Context: {{ context }}\n",
"Question: {{ question }}\n",
"\"\"\"\n",
"language_template = \"What's the official language of {{ country }}?\"\n",
"\n",
"pipe = Pipeline()\n",
"pipe.add_component(\"context_prompt\", PromptBuilder(template=context_template))\n",
"pipe.add_component(\"language_prompt\", PromptBuilder(template=language_template))\n",
"pipe.add_component(\"llm\", OpenAIGenerator(api_key=api_key))\n",
"pipe.connect(\"language_prompt\", \"context_prompt.question\")\n",
"pipe.connect(\"context_prompt\", \"llm\")\n",
"\n",
"pipe.run({\n",
" \"context_prompt\": {\"context\": \"Rose Island had its own government, currency, post office, and commercial establishments, and the official language was Esperanto.\"},\n",
" \"language_prompt\": {\"country\": \"the Republic of Rose Island\"}\n",
"})"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "6UaQ8n03s2rO",
"outputId": "7769e4d6-32e1-419b-ce49-523c3a778b6c"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'llm': {'replies': ['The official language of the Republic of Rose Island is Esperanto.'],\n",
" 'meta': [{'model': 'gpt-3.5-turbo-0613',\n",
" 'index': 0,\n",
" 'finish_reason': 'stop',\n",
" 'usage': {'completion_tokens': 13,\n",
" 'prompt_tokens': 57,\n",
" 'total_tokens': 70}}]}}"
]
},
"metadata": {},
"execution_count": 53
}
]
},
{
"cell_type": "code",
"source": [
"pipe.draw(\"double-prompt-builder-pipeline.png\")"
],
"metadata": {
"id": "ue-_-skjWh-Q"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"template = \"\"\"\n",
"Given the following information, answer the question.\n",
"Context: {{ context }}\n",
"Question: What's the official language of {{ country }}?\n",
"\"\"\"\n",
"pipe = Pipeline()\n",
"pipe.add_component(\"prompt_builder\", PromptBuilder(template=template))\n",
"pipe.add_component(\"llm\", OpenAIGenerator(api_key=api_key))\n",
"pipe.connect(\"prompt_builder\", \"llm\")\n",
"\n",
"pipe.run({\n",
" \"prompt_builder\": {\n",
" \"context\": \"Rose Island had its own government, currency, post office, and commercial establishments, and the official language was Esperanto.\",\n",
" \"country\": \"the Republic of Rose Island\"\n",
" }\n",
"})"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Gc2w5MD1WzzB",
"outputId": "f02b2196-90c6-4dde-f3aa-b106d39cd94c"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'llm': {'replies': ['The official language of the Republic of Rose Island is Esperanto.'],\n",
" 'meta': [{'model': 'gpt-3.5-turbo-0613',\n",
" 'index': 0,\n",
" 'finish_reason': 'stop',\n",
" 'usage': {'completion_tokens': 13,\n",
" 'prompt_tokens': 57,\n",
" 'total_tokens': 70}}]}}"
]
},
"metadata": {},
"execution_count": 55
}
]
},
{
"cell_type": "code",
"source": [
"pipe.draw(\"advanced-prompt-builder-pipeline.png\")"
],
"metadata": {
"id": "-simX98cW8SL"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Retrieving the context"
],
"metadata": {
"id": "3V5sCS1bUqzS"
}
},
{
"cell_type": "code",
"source": [
"from haystack.dataclasses import Document\n",
"from haystack.document_stores.in_memory import InMemoryDocumentStore\n",
"\n",
"documents = [\n",
" Document(content=\"German is the the official language of Germany.\"),\n",
" Document(content=\"The capital of France is Paris, and its official language is French.\"),\n",
" Document(content=\"Italy recognizes a few official languages, but the most widespread one is Italian.\"),\n",
" Document(content=\"Esperanto has been adopted as official language for some microstates as well, such as the Republic of Rose Island, a short-lived microstate built on a sea platform in the Adriatic Sea.\")\n",
"]\n",
"docstore = InMemoryDocumentStore()\n",
"docstore.write_documents(documents=documents)\n",
"\n",
"docstore.filter_documents()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "j0s5v7VCXhFo",
"outputId": "c48294bf-13a1-4f1d-8321-1996ede51f21"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[Document(id=a9e71bb14b0adc6a4e7aaf715129fdae95ff01fa764d43a3eccb61794eb5eb5b, content: 'German is the the official language of Germany.'),\n",
" Document(id=72213a7ad56744ce248bf8608fad642859e5ec4b25519321f9eaa33fe1205fa5, content: 'The capital of France is Paris, and its official language is French.'),\n",
" Document(id=8101f840db7cf3a7567b07ca8cc52ac36be8881fb8ac400c4a6c16653dfa1a47, content: 'Italy recognizes a few official languages, but the most widespread one is Italian.'),\n",
" Document(id=eb573d87abd5cbd94f620c6cbce316af21a8f35e594d02a21b387d857fc4441b, content: 'Esperanto has been adopted as official language for some microstates as well, such as the Republic o...')]"
]
},
"metadata": {},
"execution_count": 57
}
]
},
{
"cell_type": "code",
"source": [
"from haystack.components.retrievers.in_memory import InMemoryBM25Retriever\n",
"\n",
"retriever = InMemoryBM25Retriever(document_store=docstore)"
],
"metadata": {
"id": "hxpU-H_gXiNt"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"retriever.run(query=\"Rose Island\", top_k=1)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 84,
"referenced_widgets": [
"d4dfda42872f4f77abbf66b97833c362",
"2c8ef68c44664ccda137d1cd91eb6f7f",
"2cda6a3527b849db847184b88d1fd5b8",
"b8f7d82157a7418f8eded2b65dbf401d",
"f687f485cfd44886bc95c4b7c28fb3f1",
"1e354853c0a644079a831b45b7d66c99",
"d4d449df1400492db4cd3f6428113990",
"30bc8c55858b4d009f436c4843418c02",
"63341eb3676d42459fccd81842001637",
"33c14dd20b364bd09281bd925c450ca8",
"ff14949a23044e2081971907abf73dbb"
]
},
"id": "J6va4mCuXiIU",
"outputId": "05d3abc9-0e67-4f48-ff58-e4a1025accc6"
},
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Ranking by BM25...: 0%| | 0/4 [00:00<?, ? docs/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "d4dfda42872f4f77abbf66b97833c362"
}
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'documents': [Document(id=eb573d87abd5cbd94f620c6cbce316af21a8f35e594d02a21b387d857fc4441b, content: 'Esperanto has been adopted as official language for some microstates as well, such as the Republic o...', score: 1.1925645254031016)]}"
]
},
"metadata": {},
"execution_count": 59
}
]
},
{
"cell_type": "code",
"source": [
"retriever.run(query=\"Rose Island\", top_k=3)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 84,
"referenced_widgets": [
"b7e95e57b21b4e5784b1e697984844ba",
"3b9f505818b74f09a778089354014982",
"e96dcde2b0604a869f5dc34df231f524",
"5af5301d74524d46a768ab554df74c84",
"26486e60d6564fbd9fe7150cfc746af1",
"ef15806a667b402d96b45e1199be8130",
"f16d6ef7eeaa465fae0d79fc9e0f7027",
"a132706aafec4f30954221e561bfca80",
"f255fd94b7164c52b782dac65f7e62ec",
"1c7fc1ce7a414359a90f49cb60c25193",
"497b467a21804300a3c0a92897ca43c1"
]
},
"id": "iWpwPT4JXiCY",
"outputId": "ce74681d-3a58-484a-fb93-1c9eef108f1c"
},
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Ranking by BM25...: 0%| | 0/4 [00:00<?, ? docs/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "b7e95e57b21b4e5784b1e697984844ba"
}
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'documents': [Document(id=eb573d87abd5cbd94f620c6cbce316af21a8f35e594d02a21b387d857fc4441b, content: 'Esperanto has been adopted as official language for some microstates as well, such as the Republic o...', score: 1.1925645254031016)]}"
]
},
"metadata": {},
"execution_count": 60
}
]
},
{
"cell_type": "markdown",
"source": [
"# Our first RAG Pipeline"
],
"metadata": {
"id": "0aRCL7YyX70Q"
}
},
{
"cell_type": "code",
"source": [
"template = \"\"\"\n",
"Given the following information, answer the question.\n",
"\n",
"Context:\n",
"{% for document in documents %}\n",
" {{ document.content }}\n",
"{% endfor %}\n",
"\n",
"Question: What's the official language of {{ country }}?\n",
"\"\"\"\n",
"pipe = Pipeline()\n",
"\n",
"pipe.add_component(\"retriever\", InMemoryBM25Retriever(document_store=docstore))\n",
"pipe.add_component(\"prompt_builder\", PromptBuilder(template=template))\n",
"pipe.add_component(\"llm\", OpenAIGenerator(api_key=api_key))\n",
"pipe.connect(\"retriever\", \"prompt_builder.documents\")\n",
"pipe.connect(\"prompt_builder\", \"llm\")\n",
"\n",
"country = \"the Republic of Rose Island\"\n",
"pipe.run({\n",
" \"retriever\": {\"query\": country},\n",
" \"prompt_builder\": {\"country\": country}\n",
"})"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 171,
"referenced_widgets": [
"be678df9ec144b288cb9ac1b9e86b1fb",
"61e35495d0834901a91ebe21810e4d2c",
"b289679b43fa48bdb7c7fe5329967675",
"d41b91a377544abc836629e7a46a8cec",
"81da0bfb51aa4536abd6da0a9b68e6e6",
"3433f3c36cc1423a8ef99fcc7b5eec1d",
"f114e2182dcd4096a84c56bbe8eab1b0",
"b2c1b1787aff478e9b647593bda28724",
"f7cf9933c6af4fa08172bd116d437125",
"bf5dcae1b4fc42ecab2017129429d028",
"d1b52e9f589b4a27ad798461df285fbe"
]
},
"id": "jFmhoTfNXh7e",
"outputId": "05245674-90c7-45f5-d6b8-5b566352711f"
},
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Ranking by BM25...: 0%| | 0/4 [00:00<?, ? docs/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "be678df9ec144b288cb9ac1b9e86b1fb"
}
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'llm': {'replies': ['The official language of the Republic of Rose Island is Esperanto.'],\n",
" 'meta': [{'model': 'gpt-3.5-turbo-0613',\n",
" 'index': 0,\n",
" 'finish_reason': 'stop',\n",
" 'usage': {'completion_tokens': 13,\n",
" 'prompt_tokens': 114,\n",
" 'total_tokens': 127}}]}}"
]
},
"metadata": {},
"execution_count": 61
}
]
},
{
"cell_type": "code",
"source": [
"pipe.draw(\"simple-rag-pipeline.png\")"
],
"metadata": {
"id": "XkS0ozuZXhfV"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# Going to production"
],
"metadata": {
"id": "iGg33w2pmHr6"
}
},
{
"cell_type": "markdown",
"source": [
"## Setting up Elasticsearch 8 (MANUAL STEPS REQUIRED)"
],
"metadata": {
"id": "nHF3oUcUznyn"
}
},
{
"cell_type": "code",
"source": [
"%%bash\n",
"\n",
"rm -rf elasticsearch*\n",
"wget -q https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.8.0-linux-x86_64.tar.gz\n",
"tar -xzf elasticsearch-8.8.0-linux-x86_64.tar.gz\n",
"sudo chown -R daemon:daemon elasticsearch-8.8.0/\n",
"umount /sys/fs/cgroup\n",
"apt install cgroup-tools"
],
"metadata": {
"id": "S6EuX-CDnBZh"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"%%bash --bg\n",
"\n",
"sudo -H -u daemon elasticsearch-8.8.0/bin/elasticsearch"
],
"metadata": {
"id": "zefnKPrawP0S"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# This part is important, since it takes some time for instance to load\n",
"import os\n",
"import time\n",
"time.sleep(60)"
],
"metadata": {
"id": "xaAAUNrywRgx"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!ps -ef | grep elastic"
],
"metadata": {
"id": "nOOr_HbFwSoa"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Type 'y' when requested\n",
"\n",
"!/content/elasticsearch-8.8.0/bin/elasticsearch-setup-passwords auto -url \"https://localhost:9200\""
],
"metadata": {
"id": "1JmlziR8wWYD"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Copy the string at the bottom of the output and export it as an env var. It looks like this:\n",
"#\n",
"# Changed password for user elastic\n",
"# PASSWORD elastic = zkjH4RYRZbUjJk9xUYEV\n",
"#\n",
"# You need this password in the cell below as well.\n",
"\n",
"os.environ[\"ELASTICSEARCH_PASSWORD\"] = \"rsAhNQx4pI64TdjbFYL6\""
],
"metadata": {
"id": "QzLZGbKhwjWk"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Test the password: copy it in the field when requested and check if the request succeeds\n",
"!curl --cacert /content/elasticsearch-8.8.0/config/certs/http_ca.crt -u elastic -H 'Content-Type: application/json' -XGET https://localhost:9200/?pretty=true"
],
"metadata": {
"id": "fb_byjShwfR1"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Install the Haystack Elasticsearch integration"
],
"metadata": {
"id": "MgGM2qKXzvUA"
}
},
{
"cell_type": "code",
"source": [
"%pip install elasticsearch-haystack==0.1.2"
],
"metadata": {
"id": "xNm-mSN8mPFd"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Build the pipeline"
],
"metadata": {
"id": "hTTmopFX-jse"
}
},
{
"cell_type": "code",
"source": [
"from elasticsearch_haystack.document_store import ElasticsearchDocumentStore\n",
"\n",
"# Get the host where Elasticsearch is running, default to localhost\n",
"host = os.environ.get(\"ELASTICSEARCH_HOST\", \"https://localhost:9200\")\n",
"user = \"elastic\"\n",
"pwd = os.environ[\"ELASTICSEARCH_PASSWORD\"] # If this fails, make sure you uncommented the relevant lines during ES setup.\n",
"\n",
"docstore = ElasticsearchDocumentStore(hosts=[host], basic_auth=(user, pwd), ca_certs=\"/content/elasticsearch-8.8.0/config/certs/http_ca.crt\")"
],
"metadata": {
"id": "QRVh5wAHnxJz"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from haystack.document_stores import DuplicatePolicy\n",
"documents = [\n",
" Document(content=\"German is the the official language of Germany.\"),\n",
" Document(content=\"The capital of France is Paris, and its official language is French.\"),\n",
" Document(content=\"Italy recognizes a few official languages, but the most widespread one is Italian.\"),\n",
" Document(content=\"Esperanto has been adopted as official language for some microstates as well, such as the Republic of Rose Island, a short-lived microstate built on a sea platform in the Adriatic Sea.\")\n",
"]\n",
"docstore.write_documents(documents=documents, policy=DuplicatePolicy.OVERWRITE)\n",
"\n",
"docstore.filter_documents()"
],
"metadata": {
"id": "lAlQ0wiL1RfN"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from elasticsearch_haystack.bm25_retriever import ElasticsearchBM25Retriever\n",
"\n",
"template = \"\"\"\n",
"Given the following information, answer the question.\n",
"\n",
"Context:\n",
"{% for document in documents %}\n",
" {{ document.content }}\n",
"{% endfor %}\n",
"\n",
"Question: What's the official language of {{ country }}?\n",
"\"\"\"\n",
"\n",
"pipe = Pipeline()\n",
"pipe.add_component(\"retriever\", ElasticsearchBM25Retriever(document_store=docstore))\n",
"pipe.add_component(\"prompt_builder\", PromptBuilder(template=template))\n",
"pipe.add_component(\"llm\", OpenAIGenerator(api_key=api_key))\n",
"pipe.connect(\"retriever\", \"prompt_builder.documents\")\n",
"pipe.connect(\"prompt_builder\", \"llm\")\n",
"\n",
"pipe.draw(\"elasticsearch-rag-pipeline.png\")\n",
"\n",
"country = \"the Republic of Rose Island\"\n",
"pipe.run({\n",
" \"retriever\": {\"query\": country},\n",
" \"prompt_builder\": {\"country\": country}\n",
"})"
],
"metadata": {
"id": "rZhA_rK51EoJ"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment