Skip to content

Instantly share code, notes, and snippets.

@morganmcg1
Created May 15, 2023 19:00
Show Gist options
  • Save morganmcg1/0b4e6aca5b8e5f3638e53d314360c9c7 to your computer and use it in GitHub Desktop.
Save morganmcg1/0b4e6aca5b8e5f3638e53d314360c9c7 to your computer and use it in GitHub Desktop.
Generating synthetic questions and answers for wandbot
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"In this notebook we will automatically generate a set of evaluation questions based on wandb docs"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import wandb\n",
"import re\n",
"import openai\n",
"import os\n",
"from tqdm.auto import tqdm\n",
"import time\n",
"\n",
"from langchain.docstore.document import Document\n",
"from langchain.document_loaders import UnstructuredMarkdownLoader\n",
"from langchain.text_splitter import MarkdownTextSplitter"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# !git clone https://github.com/wandb/docodile.git"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"PROJECT = \"test\" \n",
"ENTITY = \"wandbot\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Please enter password in the VS Code prompt at the top of your VS Code window!\n",
"OpenAI API key configured\n"
]
}
],
"source": [
"from getpass import getpass\n",
"\n",
"def get_openai_key():\n",
" if os.getenv(\"OPENAI_API_KEY\") is None:\n",
" if any(['VSCODE' in x for x in os.environ.keys()]):\n",
" print('Please enter password in the VS Code prompt at the top of your VS Code window!')\n",
" os.environ[\"OPENAI_API_KEY\"] = getpass(\"Paste your OpenAI key from: https://platform.openai.com/account/api-keys\\n\")\n",
"\n",
" assert os.getenv(\"OPENAI_API_KEY\", \"\").startswith(\"sk-\"), \"This doesn't look like a valid OpenAI API key\"\n",
" print(\"OpenAI API key configured\")\n",
"\n",
"get_openai_key()\n",
"\n",
"openai.api_key = \"XXX\""
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Generate Synthetic User Questions using ChatGPT"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# This function is used to find all the markdown files in a directory and return it's content and path\n",
"\n",
"def find_md_files(directory):\n",
" md_files = []\n",
" for root, dirs, files in os.walk(directory):\n",
" for file in files:\n",
" if file.endswith(\".md\"):\n",
" file_path = os.path.join(root, file)\n",
" with open(file_path, 'r', encoding='utf-8') as md_file:\n",
" content = md_file.read()\n",
" md_files.append((file_path, content))\n",
" return md_files"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"documents = find_md_files('../docodile/docs/')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"288"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(documents)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"random.shuffle(documents)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: W&B API key is configured. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8befb08109b44051a93f3eff4f5d96d7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016688531250110827, max=1.0…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Tracking run with wandb version 0.15.2"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Run data is saved locally in <code>/Users/morganmcguire/ML/wandb_examples/wandbot_synth/wandb/run-20230514_222654-apym5p2o</code>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Syncing run <strong><a href='https://wandb.ai/wandbot/test/runs/apym5p2o' target=\"_blank\">question_gen_test</a></strong> to <a href='https://wandb.ai/wandbot/test' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View project at <a href='https://wandb.ai/wandbot/test' target=\"_blank\">https://wandb.ai/wandbot/test</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View run at <a href='https://wandb.ai/wandbot/test/runs/apym5p2o' target=\"_blank\">https://wandb.ai/wandbot/test/runs/apym5p2o</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a6fbb6f2766749f29b7775198de29bb7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/50 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"retrying: This model's maximum context length is 4097 tokens. However, your messages resulted in 5288 tokens. Please reduce the length of the messages.\n",
"retrying: This model's maximum context length is 4097 tokens. However, your messages resulted in 5288 tokens. Please reduce the length of the messages.\n",
"retrying: This model's maximum context length is 4097 tokens. However, your messages resulted in 5288 tokens. Please reduce the length of the messages.\n",
"retrying: This model's maximum context length is 4097 tokens. However, your messages resulted in 5288 tokens. Please reduce the length of the messages.\n",
"retrying: This model's maximum context length is 4097 tokens. However, your messages resulted in 5288 tokens. Please reduce the length of the messages.\n",
"retrying: This model's maximum context length is 4097 tokens. However, your messages resulted in 4567 tokens. Please reduce the length of the messages.\n",
"retrying: This model's maximum context length is 4097 tokens. However, your messages resulted in 4567 tokens. Please reduce the length of the messages.\n",
"retrying: This model's maximum context length is 4097 tokens. However, your messages resulted in 4567 tokens. Please reduce the length of the messages.\n",
"retrying: This model's maximum context length is 4097 tokens. However, your messages resulted in 4567 tokens. Please reduce the length of the messages.\n",
"retrying: This model's maximum context length is 4097 tokens. However, your messages resulted in 4567 tokens. Please reduce the length of the messages.\n",
"retrying: This model's maximum context length is 4097 tokens. However, your messages resulted in 4576 tokens. Please reduce the length of the messages.\n",
"retrying: This model's maximum context length is 4097 tokens. However, your messages resulted in 4576 tokens. Please reduce the length of the messages.\n",
"retrying: This model's maximum context length is 4097 tokens. However, your messages resulted in 4576 tokens. Please reduce the length of the messages.\n",
"retrying: This model's maximum context length is 4097 tokens. However, your messages resulted in 4576 tokens. Please reduce the length of the messages.\n",
"retrying: This model's maximum context length is 4097 tokens. However, your messages resulted in 4576 tokens. Please reduce the length of the messages.\n"
]
}
],
"source": [
"from wandb.integration.openai import autolog\n",
"\n",
"autolog({\"project\":PROJECT, \"entity\":ENTITY, \"name\":\"question_gen_test\"})\n",
"\n",
"res = []\n",
"for i in tqdm(range(len(documents[:50]))):\n",
" for i_r, _ in enumerate(range(5)):\n",
" try: \n",
" source = documents[i][0]\n",
" doc = documents[i][1]\n",
" generation_prompt = 'You are a tester of W&B support bot.\\n' +\\\n",
" 'Your goal is to generate 10 questions that can be answered by reading a document in W&B documentation.' +\\\n",
" 'Given the document, you need to imagine a hypothetical question from a user that has not read the document before.\\n' +\\\n",
" 'It should be possible to answer the question by reading and reasoning over the document.\\n' +\\\n",
" 'This question should be feasible to come from a user learning about wandb.\\n' +\\\n",
" 'The question should be answerable by a human.\\n' +\\\n",
" 'Each question should be unique and not a duplicate of another question.\\n' +\\\n",
" 'Each question should be separated by a new line.\\n' +\\\n",
" 'Let\\'s start!\\n\\n' +\\\n",
" 'Document:\\n' + doc + '\\n' +\\\n",
" 'Questions:'\n",
"\n",
" response = openai.ChatCompletion.create(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
" {\"role\": \"user\", \"content\": generation_prompt},\n",
" ]\n",
" )\n",
" generation = response.choices[0].message.content\n",
" res.append({\n",
" 'prompt': generation_prompt,\n",
" 'document': source,\n",
" 'question': generation\n",
" })\n",
" # we don't need to retry if we get here\n",
" break\n",
"\n",
" except Exception as e:\n",
" # wait for 1 minute\n",
" time.sleep(10)\n",
" print(f'retrying: {e}')\n",
"\n",
" # if i == 2: break # for testing\n",
"\n",
"# wandb.finish()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'prompt': \"You are a tester of W&B support bot.\\nYour goal is to generate 10 questions that can be answered by reading a document in W&B documentation.Given the document, you need to imagine a hypothetical question from a user that has not read the document before.\\nIt should be possible to answer the question by reading and reasoning over the document.\\nThis question should be feasible to come from a user learning about wandb.\\nThe question should be answerable by a human.\\nEach question should be unique and not a duplicate of another question.\\nEach question should be separated by a new line.\\nLet's start!\\n\\nDocument:\\n# wandb disabled\\n\\n**Usage**\\n\\n`wandb disabled [OPTIONS]`\\n\\n**Summary**\\n\\nDisable W&B.\\n\\n**Options**\\n\\n| **Option** | **Description** |\\n| :--- | :--- |\\n| --service | Disable W&B service [default: True] |\\n| --help | Show this message and exit. |\\n\\n\\nQuestions:\",\n",
" 'document': '../docodile/docs/ref/cli/wandb-disabled.md',\n",
" 'question': '1. What does the `wandb disabled` command in W&B do?\\n2. How do I disable the W&B service using the `wandb disabled` option?\\n3. Is it possible to disable specific features of W&B using the `wandb disabled` command?\\n4. Can I use the `wandb disabled` command to temporarily pause W&B logging during a particular process?\\n5. What happens to my previously logged data in W&B after I use the `wandb disabled` command?\\n6. Is it recommended to use the `wandb disabled` command frequently or only in specific situations? \\n7. Can the `wandb disabled` command be used with other W&B commands in a single script?\\n8. Does disabling W&B using the `wandb disabled` option have an impact on the performance of my models or scripts?\\n9. How can I enable W&B logging again after using the `wandb disabled` command?\\n10. Can I automate the disabling and enabling of W&B using the `wandb disabled` command through a script?'},\n",
" {'prompt': 'You are a tester of W&B support bot.\\nYour goal is to generate 10 questions that can be answered by reading a document in W&B documentation.Given the document, you need to imagine a hypothetical question from a user that has not read the document before.\\nIt should be possible to answer the question by reading and reasoning over the document.\\nThis question should be feasible to come from a user learning about wandb.\\nThe question should be answerable by a human.\\nEach question should be unique and not a duplicate of another question.\\nEach question should be separated by a new line.\\nLet\\'s start!\\n\\nDocument:\\n---\\ndescription: Track machine learning experiments with W&B.\\nslug: /guides/track\\n---\\n\\n# Track Experiments\\n\\n<head>\\n <title>Track Machine Learning and Deep Learning Experiments.</title>\\n</head>\\n\\nUse the W&B Python Library to track machine learning experiments with a few lines of code. You can then review the results in an [interactive dashboard](app.md) or export your data to Python for programmatic access using our [Public API](../../ref/python/public-api/README.md). Quickly find and [reproduce machine learning experiments with W&B](./reproduce-experiments.md).\\n\\nUtilize W&B Integrations if you use use popular frameworks such as [PyTorch](../integrations/pytorch.md), [Keras](../integrations/keras.md), or [Scikit](../integrations/scikit.md). See our [Integration guides](../integrations/intro.md) for a for a full list of integrations and information on how to add W&B to your code.\\n\\n## How it works\\n\\nW&B Experiments are composed of the following building blocks:\\n\\n1. [**`wandb.init()`**](./launch.md): Initialize a new run at the top of your script. This returns a `Run` object and creates a local directory where all logs and files are saved, then streamed asynchronously to a W&B server. If you want to use a private server instead of our hosted cloud server, we offer [Self-Hosting](../hosting/intro.md).\\n2. [**`wandb.config`**](./config.md): Save a dictionary of hyperparameters such as learning rate or model type. The model settings you capture in config are useful later to organize and query your results.\\n3. [**`wandb.log()`**](./log/intro.md): Log metrics over time in a training loop, such as accuracy and loss. By default, when you call `wandb.log` it appends a new step to the `history` object and updates the `summary` object.\\n * `history`: An array of dictionary-like objects that tracks metrics over time. These time series values are shown as default line plots in the UI.\\n * `summary`: By default, the final value of a metric logged with wandb.log(). You can set the summary for a metric manually to capture the highest accuracy or lowest loss instead of the final value. These values are used in the table, and plots that compare runs — for example, you could visualize at the final accuracy for all runs in your project.\\n4. [**`wandb.log_artifact`**](../../ref/python/artifact.md): Save outputs of a run, like the model weights or a table of predictions. This lets you track not just model training, but all the pipeline steps that affect the final model.\\n\\nThe proceeding pseudocode demonstrates a common W&B Experiment tracking workflow:\\n\\n```python\\n# Flexible integration for any Python script\\nimport wandb\\n\\n# 1. Start a W&B Run\\nwandb.init(project=\\'my-project-name\\')\\n\\n# 2. Save mode inputs and hyperparameters\\nconfig = wandb.config\\nconfig.learning_rate = 0.01\\n\\n# Set up model and data\\nmodel, dataloader = get_model(), get_data()\\n\\n# Model training goes here\\n\\n# 3. Log metrics over time to visualize performance\\nwandb.log({\"loss\": loss})\\n\\n# 4. Log an artifact to W&B\\nwandb.log_artifact(model)\\n\\n```\\n\\n## How to get started\\n\\nDepending on your use case, explore the following resources to get started with W&B Experiments:\\n\\n* If this is your first time using W&B Experiments, we recommend you read the Quick Start. The [Quickstart](../../quickstart.md) walks you through setting up your first experiment.\\n* Explore topics about Experiments in the W&B Developer Guide such as:\\n * Create an experiment\\n * Configure experiments\\n * Log data from experiments\\n * View results from experiments\\n* Explore the [W&B Python Library](../../ref/python/README.md) within the [W&B API Reference Guide](../../ref/README.md).\\n\\n\\n\\n\\n\\n\\n\\n\\nQuestions:',\n",
" 'document': '../docodile/docs/guides/track/intro.md',\n",
" 'question': '1. What is the W&B Python Library used for?\\n2. How can I review the results of my machine learning experiments tracked with W&B?\\n3. Can I export the data from my experiments tracked with W&B to Python?\\n4. Are there any integrations for popular frameworks available in W&B?\\n5. What is the purpose of `wandb.init()`?\\n6. What is the purpose of `wandb.config`?\\n7. How can I log metrics over time in a training loop with W&B?\\n8. What is the purpose of `wandb.log_artifact`?\\n9. What are the building blocks of W&B Experiments?\\n10. Where can I find resources to get started with W&B Experiments?'},\n",
" {'prompt': \"You are a tester of W&B support bot.\\nYour goal is to generate 10 questions that can be answered by reading a document in W&B documentation.Given the document, you need to imagine a hypothetical question from a user that has not read the document before.\\nIt should be possible to answer the question by reading and reasoning over the document.\\nThis question should be feasible to come from a user learning about wandb.\\nThe question should be answerable by a human.\\nEach question should be unique and not a duplicate of another question.\\nEach question should be separated by a new line.\\nLet's start!\\n\\nDocument:\\n# Scatter Plot\\n\\nUse the scatter plot to compare multiple runs and visualize how your experiments are performing. We've added some customizable features:\\n\\n1. Plot a line along the min, max, and average\\n2. Custom metadata tooltips\\n3. Control point colors \\n4. Set axes ranges\\n5. Switch axes to log scale\\n\\nHere’s an example of validation accuracy of different models over a couple of weeks of experimentation. The tooltip is customized to include the batch size and dropout as well as the values on the axes. There’s also a line plotting the running average of validation accuracy. \\n[See a live example →](https://app.wandb.ai/l2k2/l2k/reports?view=carey%2FScatter%20Plot)\\n\\n![](https://paper-attachments.dropbox.com/s_9D642C56E99751C2C061E55EAAB63359266180D2F6A31D97691B25896D2271FC_1579031258748_image.png)\\n\\n## Common Questions\\n\\n### Is it possible to plot the max of a metric rather than plot step by step?\\n\\nThe best way to do this is to create a Scatter Plot of the metric, go into the Edit menu, and select Annotations. From there you can plot the running max of the values\\n\\n\\nQuestions:\",\n",
" 'document': '../docodile/docs/guides/app/features/panels/scatter-plot.md',\n",
" 'question': '1. How can the scatter plot be used to compare multiple runs of experiments?\\n2. What customizable features are available when using the scatter plot?\\n3. Can custom metadata tooltips be added to the scatter plot? If so, how?\\n4. Is it possible to control the colors of the points on the scatter plot? \\n5. How can the axes ranges be set on the scatter plot? \\n6. Can the axes be switched to a log scale on the scatter plot? If so, how?\\n7. Can the scatter plot show the running average of a metric? \\n8. Is there an example of how to use the scatter plot with validation accuracy data from different models?\\n9. What is the purpose of the line plotted on the validation accuracy scatter plot example?\\n10. How can the running max of a metric be plotted on the scatter plot?'}]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res[:3]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# we will now split each generation in res into a list of questions based on the new line character and flatten the resulting list\n",
"res = [{'prompt': x['prompt'], 'document': x['document'], 'question': y} for x in res for y in x['question'].split('\\n')]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# now let's remove the numeric characters, point and space at the beginning of each question\n",
"for i in range(len(res)):\n",
" res[i]['question'] = re.sub(r'^[\\d. ]+', '', res[i]['question'])\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"What does the `wandb disabled` command in W&B do?\n",
"How do I disable the W&B service using the `wandb disabled` option?\n",
"Is it possible to disable specific features of W&B using the `wandb disabled` command?\n",
"Can I use the `wandb disabled` command to temporarily pause W&B logging during a particular process?\n",
"What happens to my previously logged data in W&B after I use the `wandb disabled` command?\n",
"Is it recommended to use the `wandb disabled` command frequently or only in specific situations? \n",
"Can the `wandb disabled` command be used with other W&B commands in a single script?\n",
"Does disabling W&B using the `wandb disabled` option have an impact on the performance of my models or scripts?\n",
"How can I enable W&B logging again after using the `wandb disabled` command?\n",
"Can I automate the disabling and enabling of W&B using the `wandb disabled` command through a script?\n",
"What is the W&B Python Library used for?\n",
"How can I review the results of my machine learning experiments tracked with W&B?\n",
"Can I export the data from my experiments tracked with W&B to Python?\n",
"Are there any integrations for popular frameworks available in W&B?\n",
"What is the purpose of `wandb.init()`?\n",
"What is the purpose of `wandb.config`?\n",
"How can I log metrics over time in a training loop with W&B?\n",
"What is the purpose of `wandb.log_artifact`?\n",
"What are the building blocks of W&B Experiments?\n",
"Where can I find resources to get started with W&B Experiments?\n",
"How can the scatter plot be used to compare multiple runs of experiments?\n",
"What customizable features are available when using the scatter plot?\n"
]
}
],
"source": [
"qs = [x['question'] for x in res]\n",
"for i,q in enumerate(qs):\n",
" print(q)\n",
" if i > 20: break"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>prompt</th>\n",
" <th>document</th>\n",
" <th>question</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>You are a tester of W&amp;B support bot.\\nYour goa...</td>\n",
" <td>../docodile/docs/ref/cli/wandb-disabled.md</td>\n",
" <td>What does the `wandb disabled` command in W&amp;B do?</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>You are a tester of W&amp;B support bot.\\nYour goa...</td>\n",
" <td>../docodile/docs/ref/cli/wandb-disabled.md</td>\n",
" <td>How do I disable the W&amp;B service using the `wa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>You are a tester of W&amp;B support bot.\\nYour goa...</td>\n",
" <td>../docodile/docs/ref/cli/wandb-disabled.md</td>\n",
" <td>Is it possible to disable specific features of...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>You are a tester of W&amp;B support bot.\\nYour goa...</td>\n",
" <td>../docodile/docs/ref/cli/wandb-disabled.md</td>\n",
" <td>Can I use the `wandb disabled` command to temp...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>You are a tester of W&amp;B support bot.\\nYour goa...</td>\n",
" <td>../docodile/docs/ref/cli/wandb-disabled.md</td>\n",
" <td>What happens to my previously logged data in W...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" prompt \\\n",
"0 You are a tester of W&B support bot.\\nYour goa... \n",
"1 You are a tester of W&B support bot.\\nYour goa... \n",
"2 You are a tester of W&B support bot.\\nYour goa... \n",
"3 You are a tester of W&B support bot.\\nYour goa... \n",
"4 You are a tester of W&B support bot.\\nYour goa... \n",
"\n",
" document \\\n",
"0 ../docodile/docs/ref/cli/wandb-disabled.md \n",
"1 ../docodile/docs/ref/cli/wandb-disabled.md \n",
"2 ../docodile/docs/ref/cli/wandb-disabled.md \n",
"3 ../docodile/docs/ref/cli/wandb-disabled.md \n",
"4 ../docodile/docs/ref/cli/wandb-disabled.md \n",
"\n",
" question \n",
"0 What does the `wandb disabled` command in W&B do? \n",
"1 How do I disable the W&B service using the `wa... \n",
"2 Is it possible to disable specific features of... \n",
"3 Can I use the `wandb disabled` command to temp... \n",
"4 What happens to my previously logged data in W... "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame(res)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv('sythetic-user-questions_2023-05-14.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"435"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# with wandb.init(project='wandb_bot_eval', entity='wandbot'):\n",
"# wandb.log({'generated_questions_table': wandb.Table(dataframe=res)})"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Answer Questions with WandBot"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"from typing import Any, Dict, List\n",
"\n",
"import wandb\n",
"from langchain import LLMChain\n",
"from langchain.chains import HypotheticalDocumentEmbedder, RetrievalQAWithSourcesChain\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.prompts.chat import (\n",
" ChatPromptTemplate,\n",
" HumanMessagePromptTemplate,\n",
" SystemMessagePromptTemplate,\n",
")\n",
"from langchain.schema import Document\n",
"from langchain.vectorstores import FAISS\n",
"from langchain.vectorstores.base import VectorStoreRetriever\n",
"from langchain.chains import HypotheticalDocumentEmbedder, RetrievalQAWithSourcesChain\n",
"\n",
"class VectorStoreRetrieverWithScore(VectorStoreRetriever):\n",
" def get_relevant_documents(self, query: str) -> List[Document]:\n",
" if self.search_type == \"similarity\":\n",
" docs_and_scores = self.vectorstore.similarity_search_with_score(\n",
" query, **self.search_kwargs\n",
" )\n",
" docs = []\n",
" for doc, score in docs_and_scores:\n",
" doc.metadata[\"score\"] = score\n",
" docs.append(doc)\n",
" elif self.search_type == \"mmr\":\n",
" docs = self.vectorstore.max_marginal_relevance_search(\n",
" query, **self.search_kwargs\n",
" )\n",
" else:\n",
" raise ValueError(f\"search_type of {self.search_type} not allowed.\")\n",
" return docs\n",
"\n",
"\n",
"class FAISSWithScore(FAISS):\n",
" def as_retriever(self) -> VectorStoreRetrieverWithScore:\n",
" return VectorStoreRetrieverWithScore(\n",
" vectorstore=self,\n",
" search_type=\"similarity\",\n",
" search_kwargs={\"k\": 10},\n",
" )\n",
"\n",
"\n",
"class RetrievalQAWithSourcesChainWithScore(RetrievalQAWithSourcesChain):\n",
" reduce_k_below_max_tokens: bool = True\n",
" max_tokens_limit: int = 2816\n",
"\n",
" def _get_docs(self, inputs: Dict[str, Any]) -> List[Document]:\n",
" question = inputs[self.question_key]\n",
" docs = self.retriever.get_relevant_documents(question)\n",
" return self._reduce_tokens_below_limit(docs)\n",
"\n",
"\n",
"def load_artifacts(config):\n",
" faiss_artifact = wandb.use_artifact(config.faiss_artifact, type=\"search_index\")\n",
" faiss_artifact_dir = faiss_artifact.download()\n",
"\n",
" hyde_prompt_artifact = wandb.use_artifact(\n",
" config.hyde_prompt_artifact, type=\"prompt\"\n",
" )\n",
" hyde_artifact_dir = hyde_prompt_artifact.download()\n",
" hyde_prompt_file = f\"{hyde_artifact_dir}/hyde_prompt.txt\"\n",
"\n",
" chat_prompt_artifact = wandb.use_artifact(\n",
" config.chat_prompt_artifact, type=\"prompt\"\n",
" )\n",
" chat_artifact_dir = chat_prompt_artifact.download()\n",
" chat_prompt_file = f\"{chat_artifact_dir}/chat_prompt.txt\"\n",
"\n",
" return {\n",
" \"faiss\": faiss_artifact_dir,\n",
" \"hyde_prompt\": hyde_prompt_file,\n",
" \"chat_prompt\": chat_prompt_file,\n",
" }\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import json \n",
"def parse_source_documents(source_documents):\n",
" source_docs_dict = {}\n",
" for i,source_doc in enumerate(source_documents):\n",
" source_docs_dict[f\"source_doc_{i}\"] ={\n",
" \"page_content\":source_doc.page_content,\n",
" \"metadata\":source_doc.metadata[\"source\"],\n",
" \"lookup_index\":source_doc.lookup_index,\n",
" \"lookup_str\":source_doc.lookup_str,\n",
" } \n",
"\n",
" return json.dumps(source_docs_dict)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"import anthropic\n",
"\n",
"def claude_eval_prompt_constructor(question, source_documents, answer):\n",
" evaluation_prompts_template = f\"\"\"{anthropic.HUMAN_PROMPT}\n",
" As an experienced software quality assurance tester, you are evaluating the quality of the response from a\n",
" Weights & Biasses (aka wandb, W&B) support bot called wandbot. Supporting documentation is provided to help you\n",
" assess the quality of the response.\n",
" Your feedback should only only be \"POSITIVE\" or \"NEGATIVE\" to indicate whether the response is accurate or not,\n",
" no other information is required. For example:\n",
" {anthropic.HUMAN_PROMPT}\n",
" =====================\n",
" USER_QUESTION: What is wandb?\n",
" SUPPORTING_DOCUMENTATION: '{{\"source_doc_0\": {{\"page_content\": \"Weights & Biases is the machine learning platform for developers to build better models faster\", \"metadata\": \"https://docs.wandb.ai/guide\", \"lookup_index\": 0, \"lookup_str\": \"\"}}}}'\n",
" WANDBOT_RESPONSE: Weights & Biases is a machine learning platform for teams.\n",
" {anthropic.AI_PROMPT}\n",
" POSITIVE\n",
" {anthropic.HUMAN_PROMPT}\n",
" =====================\n",
" USER_QUESTION: How do I create a wandb sweep?\n",
" SUPPORTING_DOCUMENTATION: '{{\"source_doc_0\": {{\"page_content\": \"Use Weights & Biases Sweeps to automate hyperparameter search and explore the space of possible models. Create a sweep with a few lines of code.\", \"metadata\": \"https://docs.wandb.ai/guide\", \"lookup_index\": 0, \"lookup_str\": \"\"}}}}'\n",
" WANDBOT_RESPONSE: To create a W&B Articfact, you can use the wandb.Artifact class like so ```artifact = wandb.Artifact(name='bicycle-dataset', type='dataset')```\n",
" {anthropic.AI_PROMPT}\n",
" NEGATIVE\n",
" {anthropic.HUMAN_PROMPT}\n",
" =====================\n",
" USER_QUESTION: {question}\n",
" SUPPORTING_DOCUMENTATION: {source_documents}\n",
" WANDBOT_RESPONSE: {answer}\n",
" {anthropic.AI_PROMPT}\"\"\"\n",
" return evaluation_prompts_template\n",
"\n",
"question = \"what is wandb?\"\n",
"answer = \"Weights & Biases is a machine learning platform for teams.\"\n",
"# print(claude_eval_prompt_constructor(question, source_documents, answer))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# qa_chain.json"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"OpenAI API key configured\n"
]
},
{
"data": {
"text/html": [
"Finishing last run (ID:pcdc2np8) before initializing another..."
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View run <strong style=\"color:#cdcd00\">qa_test</strong> at: <a href='https://wandb.ai/wandbot/test/runs/pcdc2np8' target=\"_blank\">https://wandb.ai/wandbot/test/runs/pcdc2np8</a><br/>Synced 6 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Find logs at: <code>./wandb/run-20230515_175016-pcdc2np8/logs</code>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Successfully finished last run (ID:pcdc2np8). Initializing new run:<br/>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4df5d973a3804488a455c410d68cfc13",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.0167188229165428, max=1.0))…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Tracking run with wandb version 0.15.2"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Run data is saved locally in <code>/Users/morganmcguire/ML/wandb_examples/wandbot_synth/wandb/run-20230515_181845-02ey3wb1</code>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Syncing run <strong><a href='https://wandb.ai/wandbot/test/runs/02ey3wb1' target=\"_blank\">qa_test</a></strong> to <a href='https://wandb.ai/wandbot/test' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View project at <a href='https://wandb.ai/wandbot/test' target=\"_blank\">https://wandb.ai/wandbot/test</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View run at <a href='https://wandb.ai/wandbot/test/runs/02ey3wb1' target=\"_blank\">https://wandb.ai/wandbot/test/runs/02ey3wb1</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[34m\u001b[1mwandb\u001b[0m: Downloading large artifact faiss_store:latest, 246.06MB. 2 files... \n",
"\u001b[34m\u001b[1mwandb\u001b[0m: 2 of 2 files downloaded. \n",
"Done. 0:0:0.5\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: 1 of 1 files downloaded. \n",
"\u001b[34m\u001b[1mwandb\u001b[0m: 1 of 1 files downloaded. \n"
]
}
],
"source": [
"from types import SimpleNamespace\n",
"\n",
"# login to openai with your api key\n",
"get_openai_key()\n",
"\n",
"wandbot_config = SimpleNamespace(\n",
" faiss_artifact=\"parambharat/wandb_docs_bot/faiss_store:latest\",\n",
" hyde_prompt_artifact=\"parambharat/wandb_docs_bot/hyde_prompt:latest\",\n",
" chat_prompt_artifact=\"parambharat/wandb_docs_bot/system_prompt:latest\",\n",
" model_name=\"gpt-3.5-turbo\",\n",
")\n",
"\n",
"wandb.init(\n",
" name=\"qa_test\",\n",
" project=PROJECT,\n",
" entity=ENTITY,\n",
" config=wandbot_config,\n",
" )\n",
"\n",
"artifacts = load_artifacts(wandb.config)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"from wandb.sdk.lib.runid import generate_id\n",
"from wandb.integration.langchain import WandbTracer\n",
"from langchain.callbacks import get_openai_callback\n",
"\n",
"# LOAD DATA AND PROMPTS FROM ARTIFACTS\n",
"faiss_dir = artifacts[\"faiss\"]\n",
"hyde_prompt_template = open(artifacts[\"hyde_prompt\"]).read()\n",
"system_prompt_template = open(artifacts[\"chat_prompt\"]).read()\n",
"human_message_prompt_template = \"{question}\"\n",
"\n",
"\n",
"# SETUP Hypothetical Document Embedder (HyDE)\n",
"hyde_messages = [\n",
" SystemMessagePromptTemplate.from_template(hyde_prompt_template),\n",
" HumanMessagePromptTemplate.from_template(\"{question}\"),\n",
"]\n",
"hyde_prompt = ChatPromptTemplate.from_messages(hyde_messages)\n",
"\n",
"base_embeddings = OpenAIEmbeddings()\n",
"embeddings = HypotheticalDocumentEmbedder(\n",
" llm_chain=LLMChain(llm=ChatOpenAI(temperature=0.3), prompt=hyde_prompt),\n",
" base_embeddings=base_embeddings,\n",
" verbose=True\n",
")\n",
"\n",
"# LOAD CHAT PROMPT TEMPLATE\n",
"qa_messages = [\n",
" SystemMessagePromptTemplate.from_template(system_prompt_template),\n",
" HumanMessagePromptTemplate.from_template(human_message_prompt_template),\n",
"]\n",
"qa_prompt = ChatPromptTemplate.from_messages(qa_messages)\n",
"\n",
"# LOAD FAISS VECTOR STORE\n",
"vector_store = FAISSWithScore.load_local(faiss_dir, embeddings)\n",
"\n",
"# LOAD QA CHAIN\n",
"qa_chain = RetrievalQAWithSourcesChainWithScore.from_chain_type(\n",
" llm = ChatOpenAI(\n",
" model_name=wandb.config.model_name,\n",
" temperature=0,\n",
" request_timeout=20\n",
" ),\n",
" chain_type=\"stuff\",\n",
" retriever=vector_store.as_retriever(),\n",
" chain_type_kwargs={\"prompt\": qa_prompt},\n",
" return_source_documents=True,\n",
" verbose=True\n",
")\n",
"\n",
"# WANDB LOGGING CONFIG\n",
"wandb_config = {\"project\": PROJECT, \"entity\":ENTITY} # config for OpenAI autologger\n",
"table_cols = [\n",
" \"wandb_run_id\",\"query_id\", \"query\", \"wandbot_answer\", \"retrived_source_documents\",\n",
" \"synth_user_feedback_signal\", \"elapsed_time_s\", \"start_time\", \n",
" \"prompt_tokens\", \"completion_tokens\", \"total_tokens\",\n",
" \"total_cost_usd\", \"successful_requests\", \n",
" \"system_prompt_template\", \"human_message_prompt_template\", \"hyde_prompt_template\", \"eval_prompt_template\",\n",
" \"wandbot_model\", \"eval_model\"\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['What does the `wandb disabled` command in W&B do?',\n",
" 'How do I disable the W&B service using the `wandb disabled` option?',\n",
" 'Is it possible to disable specific features of W&B using the `wandb disabled` command?',\n",
" 'Can I use the `wandb disabled` command to temporarily pause W&B logging during a particular process?',\n",
" 'What happens to my previously logged data in W&B after I use the `wandb disabled` command?'],\n",
" dtype=object)"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"df = pd.read_csv('sythetic-user-questions_2023-05-14.csv')\n",
"questions = df[\"question\"].values\n",
"questions = questions[:5]\n",
"questions"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new RetrievalQAWithSourcesChainWithScore chain...\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"Question 0, Error occured: This table expects 18 columns: ['wandb_run_id', 'query_id', 'query', 'wandbot_answer', 'retrived_source_documents', 'system_prompt_template', 'human_message_prompt_template', 'hyde_prompt_template', 'synth_user_feedback_signal', 'elapsed_time_s', 'start_time', 'prompt_tokens', 'completion_tokens', 'total_tokens', 'total_cost_usd', 'successful_requests', 'wandbot_model', 'eval_model'], found 19\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Traceback (most recent call last):\n",
" File \"/var/folders/x6/tdg2_lyn14n9vxhrw5znyrtw0000gn/T/ipykernel_19604/2088620901.py\", line 57, in <module>\n",
" wandb_table.add_data(wandb.run.id, query_id, query, answer, source_documents,\n",
" File \"/Users/morganmcguire/mambaforge/envs/ml/lib/python3.11/site-packages/wandb/data_types.py\", line 464, in add_data\n",
" raise ValueError(\n",
"ValueError: This table expects 18 columns: ['wandb_run_id', 'query_id', 'query', 'wandbot_answer', 'retrived_source_documents', 'system_prompt_template', 'human_message_prompt_template', 'hyde_prompt_template', 'synth_user_feedback_signal', 'elapsed_time_s', 'start_time', 'prompt_tokens', 'completion_tokens', 'total_tokens', 'total_cost_usd', 'successful_requests', 'wandbot_model', 'eval_model'], found 19\n"
]
}
],
"source": [
"import traceback\n",
"\n",
"for i_q, query in enumerate(questions):\n",
"\n",
" # USER QUERY\n",
" # query = \"what is wandb?\"\n",
" # query = \" \".join(query.strip().split())\n",
" query_id = generate_id(length=16)\n",
"\n",
" # RUN CHAIN\n",
" try:\n",
"\n",
" start_time = time.time()\n",
" with get_openai_callback() as openai_cb:\n",
" response = qa_chain({\"question\": query}, \n",
" callbacks=[WandbTracer(wandb_config)],\n",
" return_only_outputs=False,\n",
" )\n",
" end_time = time.time()\n",
" elapsed_time = end_time - start_time\n",
" start_time = int(start_time) # convert to int for wandb table timestamp\n",
"\n",
" answer = response[\"answer\"]\n",
"\n",
" # RETRIEVED DOCUMENTS\n",
" source_docs = response[\"source_documents\"]\n",
" source_documents = parse_source_documents(source_docs)\n",
"\n",
" # TOKEN METRICS\n",
" prompts_tokens = openai_cb.prompt_tokens\n",
" completion_tokens = openai_cb.completion_tokens\n",
" total_tokens = openai_cb.total_tokens\n",
" total_cost = openai_cb.total_cost\n",
" successful_requests = openai_cb.successful_requests\n",
"\n",
" # USER FEEDBACK\n",
"\n",
" eval_model = \"claude-v1.3-100k\" # \"claude-v1\",\n",
" anthropic_api = \"XXX\"\n",
" client = anthropic.Client(api_key=anthropic_api)\n",
" max_tokens_to_sample = 100000\n",
"\n",
" eval_prompt_template = claude_eval_prompt_constructor(\"\", \"\", \"\") # Just to log the eval prompt template\n",
" eval_prompt = claude_eval_prompt_constructor(question, source_documents, answer)\n",
"\n",
" resp = client.completion(\n",
" prompt=eval_prompt,\n",
" stop_sequences=[anthropic.HUMAN_PROMPT],\n",
" model=eval_model,\n",
" max_tokens_to_sample=max_tokens_to_sample,\n",
" )\n",
"\n",
" synth_user_feedback = \"POSITIVE\" if \"positive\" in resp[\"completion\"].lower() else \"NEGATIVE\"\n",
"\n",
" # LOG TO WANDB\n",
" wandb_table = wandb.Table(table_cols)\n",
" wandb_table.add_data(wandb.run.id, query_id, query, answer, source_documents,\n",
" synth_user_feedback, elapsed_time, start_time, \n",
" prompts_tokens, completion_tokens, total_tokens,\n",
" total_cost, successful_requests, \n",
" system_prompt_template, human_message_prompt_template, hyde_prompt_template, eval_prompt_template,\n",
" wandb.config.model_name, eval_model)\n",
"\n",
" wandb.log({\"logs/synth_data_test_with_claude2\": wandb_table})\n",
" except Exception as e:\n",
" print(f\"Question {i_q}, Error occured: {e}\")\n",
" traceback.print_exc()\n",
" \n",
" break\n",
" if i_q % 10: print(i_q)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"DONE!\n"
]
}
],
"source": [
"print(\"DONE!\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment