Skip to content

Instantly share code, notes, and snippets.

@danyx23
Last active March 22, 2023 18:26
Show Gist options
  • Save danyx23/25d027df3ba186fea3dc9755cd9dd554 to your computer and use it in GitHub Desktop.
Save danyx23/25d027df3ba186fea3dc9755cd9dd554 to your computer and use it in GitHub Desktop.
Test GPT for answering questions about a specific variable
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from requests import get"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://ourworldindata.org/grapher/data/variables/data/539022.json\"\n",
"data = get(url).json()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"metadata_url = \"https://ourworldindata.org/grapher/data/variables/metadata/539022.json\"\n",
"metadata = get(metadata_url).json()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"entity_names = pd.DataFrame.from_records(metadata[\"dimensions\"][\"entities\"][\"values\"])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['values', 'years', 'entities'])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.keys()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(data)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>values</th>\n",
" <th>years</th>\n",
" <th>entities</th>\n",
" <th>name</th>\n",
" <th>code</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>33.94</td>\n",
" <td>1543</td>\n",
" <td>1</td>\n",
" <td>United Kingdom</td>\n",
" <td>GBR</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>38.82</td>\n",
" <td>1548</td>\n",
" <td>1</td>\n",
" <td>United Kingdom</td>\n",
" <td>GBR</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>39.59</td>\n",
" <td>1553</td>\n",
" <td>1</td>\n",
" <td>United Kingdom</td>\n",
" <td>GBR</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>22.38</td>\n",
" <td>1558</td>\n",
" <td>1</td>\n",
" <td>United Kingdom</td>\n",
" <td>GBR</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>36.66</td>\n",
" <td>1563</td>\n",
" <td>1</td>\n",
" <td>United Kingdom</td>\n",
" <td>GBR</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20444</th>\n",
" <td>77.00</td>\n",
" <td>2021</td>\n",
" <td>166</td>\n",
" <td>Costa Rica</td>\n",
" <td>CRI</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20445</th>\n",
" <td>59.20</td>\n",
" <td>2021</td>\n",
" <td>167</td>\n",
" <td>Democratic Republic of Congo</td>\n",
" <td>COD</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20446</th>\n",
" <td>63.50</td>\n",
" <td>2021</td>\n",
" <td>168</td>\n",
" <td>Congo</td>\n",
" <td>COG</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20447</th>\n",
" <td>63.40</td>\n",
" <td>2021</td>\n",
" <td>169</td>\n",
" <td>Comoros</td>\n",
" <td>COM</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20448</th>\n",
" <td>72.80</td>\n",
" <td>2021</td>\n",
" <td>170</td>\n",
" <td>Colombia</td>\n",
" <td>COL</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>20449 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" values years entities name code\n",
"0 33.94 1543 1 United Kingdom GBR\n",
"1 38.82 1548 1 United Kingdom GBR\n",
"2 39.59 1553 1 United Kingdom GBR\n",
"3 22.38 1558 1 United Kingdom GBR\n",
"4 36.66 1563 1 United Kingdom GBR\n",
"... ... ... ... ... ...\n",
"20444 77.00 2021 166 Costa Rica CRI\n",
"20445 59.20 2021 167 Democratic Republic of Congo COD\n",
"20446 63.50 2021 168 Congo COG\n",
"20447 63.40 2021 169 Comoros COM\n",
"20448 72.80 2021 170 Colombia COL\n",
"\n",
"[20449 rows x 5 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_with_entity_names = df.join(entity_names.set_index(\"id\"), on=\"entities\")\n",
"df_with_entity_names"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"df_for_query = df_with_entity_names[[\"name\", \"years\", \"values\"]]\n",
"df_for_query.columns = [\"country\", \"year\", metadata[\"name\"]]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import create_pandas_dataframe_agent\n",
"from langchain.llms import OpenAI"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test getting intermediate steps\n",
"\n",
"Intermediate steps are printed by default which is fine for a jupster notebook but in a different context you often want to intercept this instead. Supposedly, return_intermediate_steps should give us a response that has an additional key to input and output that contains the intermediate step descritpions"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"agent = create_pandas_dataframe_agent(OpenAI(temperature=0), df_for_query, verbose=True, return_intermediate_steps=True)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mThought: I need to find out how many rows are in the dataframe\n",
"Action: python_repl_ast\n",
"Action Input: len(df)\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m20449\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
"Final Answer: There are 20449 rows in the dataframe.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'input': 'how many rows are there?',\n",
" 'output': 'There are 20449 rows in the dataframe.'}"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response = agent({\"input\": \"how many rows are there?\"})\n",
"response"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['input', 'output'])"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.keys()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Doesn't look like this works...\n",
"\n",
"Let's just ask a few more questions"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mThought: I need to find the row in the dataframe with the lowest life expectancy for Ghana\n",
"Action: python_repl_ast\n",
"Action Input: df[df['country'] == 'Ghana']['Life expectancy at birth (historical)'].min()\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m28.0\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
"Final Answer: Ghana had its lowest life expectancy in 28.0.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"'Ghana had its lowest life expectancy in 28.0.'"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent.run(\"When did Ghana have it's lowest life expectancy?\")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mThought: I need to find the year with the lowest life expectancy in Ghana\n",
"Action: python_repl_ast\n",
"Action Input: df[df['country'] == 'Ghana']['year'].min()\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m1921\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
"Final Answer: 1921\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"'1921'"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent.run(\"In which year did Ghana have it's lowest life expectancy?\")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mThought: I need to find the years for which Ghana has data\n",
"Action: python_repl_ast\n",
"Action Input: df[df['country'] == 'Ghana']['year'].min(), df[df['country'] == 'Ghana']['year'].max()\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m(1921, 2021)\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the earliest and latest years that Ghana has data for\n",
"Final Answer: The earliest year that Ghana has data for is 1921 and the latest year is 2021.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"'The earliest year that Ghana has data for is 1921 and the latest year is 2021.'"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent.run(\"What are the earliest and latest years that Ghana has data for?\")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mThought: I need to find the maximum value in the dataframe\n",
"Action: python_repl_ast\n",
"Action Input: df['Life expectancy at birth (historical)'].max()\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m86.5\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
"Final Answer: The country with the highest life expectancy is not specified in the dataframe, but the highest life expectancy is 86.5.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"'The country with the highest life expectancy is not specified in the dataframe, but the highest life expectancy is 86.5.'"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent.run(\"Which country has the highest life expectancy?\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mThought: I need to find the row with the highest life expectancy\n",
"Action: python_repl_ast\n",
"Action Input: df.loc[df['Life expectancy at birth (historical)'].idxmax()]['country']\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3mMonaco\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
"Final Answer: Monaco\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"'Monaco'"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent.run(\"What is the name of the country that has the highest life expectancy?\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "jupyter-bx2KX8rc",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment