danyx23/owid-gpt-data-test.ipynb

## owid-gpt-data-test.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from requests import get"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = \"https://ourworldindata.org/grapher/data/variables/data/539022.json\"\n",
    "data = get(url).json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "metadata_url = \"https://ourworldindata.org/grapher/data/variables/metadata/539022.json\"\n",
    "metadata = get(metadata_url).json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "entity_names = pd.DataFrame.from_records(metadata[\"dimensions\"][\"entities\"][\"values\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['values', 'years', 'entities'])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>values</th>\n",
       "      <th>years</th>\n",
       "      <th>entities</th>\n",
       "      <th>name</th>\n",
       "      <th>code</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>33.94</td>\n",
       "      <td>1543</td>\n",
       "      <td>1</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>GBR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>38.82</td>\n",
       "      <td>1548</td>\n",
       "      <td>1</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>GBR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>39.59</td>\n",
       "      <td>1553</td>\n",
       "      <td>1</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>GBR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>22.38</td>\n",
       "      <td>1558</td>\n",
       "      <td>1</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>GBR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>36.66</td>\n",
       "      <td>1563</td>\n",
       "      <td>1</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>GBR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20444</th>\n",
       "      <td>77.00</td>\n",
       "      <td>2021</td>\n",
       "      <td>166</td>\n",
       "      <td>Costa Rica</td>\n",
       "      <td>CRI</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20445</th>\n",
       "      <td>59.20</td>\n",
       "      <td>2021</td>\n",
       "      <td>167</td>\n",
       "      <td>Democratic Republic of Congo</td>\n",
       "      <td>COD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20446</th>\n",
       "      <td>63.50</td>\n",
       "      <td>2021</td>\n",
       "      <td>168</td>\n",
       "      <td>Congo</td>\n",
       "      <td>COG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20447</th>\n",
       "      <td>63.40</td>\n",
       "      <td>2021</td>\n",
       "      <td>169</td>\n",
       "      <td>Comoros</td>\n",
       "      <td>COM</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20448</th>\n",
       "      <td>72.80</td>\n",
       "      <td>2021</td>\n",
       "      <td>170</td>\n",
       "      <td>Colombia</td>\n",
       "      <td>COL</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>20449 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       values  years  entities                          name code\n",
       "0       33.94   1543         1                United Kingdom  GBR\n",
       "1       38.82   1548         1                United Kingdom  GBR\n",
       "2       39.59   1553         1                United Kingdom  GBR\n",
       "3       22.38   1558         1                United Kingdom  GBR\n",
       "4       36.66   1563         1                United Kingdom  GBR\n",
       "...       ...    ...       ...                           ...  ...\n",
       "20444   77.00   2021       166                    Costa Rica  CRI\n",
       "20445   59.20   2021       167  Democratic Republic of Congo  COD\n",
       "20446   63.50   2021       168                         Congo  COG\n",
       "20447   63.40   2021       169                       Comoros  COM\n",
       "20448   72.80   2021       170                      Colombia  COL\n",
       "\n",
       "[20449 rows x 5 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_with_entity_names = df.join(entity_names.set_index(\"id\"), on=\"entities\")\n",
    "df_with_entity_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_for_query = df_with_entity_names[[\"name\", \"years\", \"values\"]]\n",
    "df_for_query.columns = [\"country\", \"year\", metadata[\"name\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.agents import create_pandas_dataframe_agent\n",
    "from langchain.llms import OpenAI"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test getting intermediate steps\n",
    "\n",
    "Intermediate steps are printed by default which is fine for a jupster notebook but in a different context you often want to intercept this instead. Supposedly, return_intermediate_steps should give us a response that has an additional key to input and output that contains the intermediate step descritpions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "agent = create_pandas_dataframe_agent(OpenAI(temperature=0), df_for_query, verbose=True, return_intermediate_steps=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
      "\u001b[32;1m\u001b[1;3mThought: I need to find out how many rows are in the dataframe\n",
      "Action: python_repl_ast\n",
      "Action Input: len(df)\u001b[0m\n",
      "Observation: \u001b[36;1m\u001b[1;3m20449\u001b[0m\n",
      "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
      "Final Answer: There are 20449 rows in the dataframe.\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'input': 'how many rows are there?',\n",
       " 'output': 'There are 20449 rows in the dataframe.'}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response = agent({\"input\": \"how many rows are there?\"})\n",
    "response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['input', 'output'])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response.keys()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Doesn't look like this works...\n",
    "\n",
    "Let's just ask a few more questions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
      "\u001b[32;1m\u001b[1;3mThought: I need to find the row in the dataframe with the lowest life expectancy for Ghana\n",
      "Action: python_repl_ast\n",
      "Action Input: df[df['country'] == 'Ghana']['Life expectancy at birth (historical)'].min()\u001b[0m\n",
      "Observation: \u001b[36;1m\u001b[1;3m28.0\u001b[0m\n",
      "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
      "Final Answer: Ghana had its lowest life expectancy in 28.0.\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'Ghana had its lowest life expectancy in 28.0.'"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "agent.run(\"When did Ghana have it's lowest life expectancy?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
      "\u001b[32;1m\u001b[1;3mThought: I need to find the year with the lowest life expectancy in Ghana\n",
      "Action: python_repl_ast\n",
      "Action Input: df[df['country'] == 'Ghana']['year'].min()\u001b[0m\n",
      "Observation: \u001b[36;1m\u001b[1;3m1921\u001b[0m\n",
      "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
      "Final Answer: 1921\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'1921'"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "agent.run(\"In which year did Ghana have it's lowest life expectancy?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
      "\u001b[32;1m\u001b[1;3mThought: I need to find the years for which Ghana has data\n",
      "Action: python_repl_ast\n",
      "Action Input: df[df['country'] == 'Ghana']['year'].min(), df[df['country'] == 'Ghana']['year'].max()\u001b[0m\n",
      "Observation: \u001b[36;1m\u001b[1;3m(1921, 2021)\u001b[0m\n",
      "Thought:\u001b[32;1m\u001b[1;3m I now know the earliest and latest years that Ghana has data for\n",
      "Final Answer: The earliest year that Ghana has data for is 1921 and the latest year is 2021.\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'The earliest year that Ghana has data for is 1921 and the latest year is 2021.'"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "agent.run(\"What are the earliest and latest years that Ghana has data for?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
      "\u001b[32;1m\u001b[1;3mThought: I need to find the maximum value in the dataframe\n",
      "Action: python_repl_ast\n",
      "Action Input: df['Life expectancy at birth (historical)'].max()\u001b[0m\n",
      "Observation: \u001b[36;1m\u001b[1;3m86.5\u001b[0m\n",
      "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
      "Final Answer: The country with the highest life expectancy is not specified in the dataframe, but the highest life expectancy is 86.5.\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'The country with the highest life expectancy is not specified in the dataframe, but the highest life expectancy is 86.5.'"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "agent.run(\"Which country has the highest life expectancy?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
      "\u001b[32;1m\u001b[1;3mThought: I need to find the row with the highest life expectancy\n",
      "Action: python_repl_ast\n",
      "Action Input: df.loc[df['Life expectancy at birth (historical)'].idxmax()]['country']\u001b[0m\n",
      "Observation: \u001b[36;1m\u001b[1;3mMonaco\u001b[0m\n",
      "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
      "Final Answer: Monaco\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'Monaco'"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "agent.run(\"What is the name of the country that has the highest life expectancy?\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "jupyter-bx2KX8rc",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.2"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"from requests import get"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"url = \"https://ourworldindata.org/grapher/data/variables/data/539022.json\"\n",
	"data = get(url).json()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"metadata_url = \"https://ourworldindata.org/grapher/data/variables/metadata/539022.json\"\n",
	"metadata = get(metadata_url).json()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"entity_names = pd.DataFrame.from_records(metadata[\"dimensions\"][\"entities\"][\"values\"])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"dict_keys(['values', 'years', 'entities'])"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"data.keys()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"df = pd.DataFrame(data)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>values</th>\n",
	" <th>years</th>\n",
	" <th>entities</th>\n",
	" <th>name</th>\n",
	" <th>code</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>33.94</td>\n",
	" <td>1543</td>\n",
	" <td>1</td>\n",
	" <td>United Kingdom</td>\n",
	" <td>GBR</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>38.82</td>\n",
	" <td>1548</td>\n",
	" <td>1</td>\n",
	" <td>United Kingdom</td>\n",
	" <td>GBR</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>39.59</td>\n",
	" <td>1553</td>\n",
	" <td>1</td>\n",
	" <td>United Kingdom</td>\n",
	" <td>GBR</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>22.38</td>\n",
	" <td>1558</td>\n",
	" <td>1</td>\n",
	" <td>United Kingdom</td>\n",
	" <td>GBR</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>36.66</td>\n",
	" <td>1563</td>\n",
	" <td>1</td>\n",
	" <td>United Kingdom</td>\n",
	" <td>GBR</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>...</th>\n",
	" <td>...</td>\n",
	" <td>...</td>\n",
	" <td>...</td>\n",
	" <td>...</td>\n",
	" <td>...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>20444</th>\n",
	" <td>77.00</td>\n",
	" <td>2021</td>\n",
	" <td>166</td>\n",
	" <td>Costa Rica</td>\n",
	" <td>CRI</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>20445</th>\n",
	" <td>59.20</td>\n",
	" <td>2021</td>\n",
	" <td>167</td>\n",
	" <td>Democratic Republic of Congo</td>\n",
	" <td>COD</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>20446</th>\n",
	" <td>63.50</td>\n",
	" <td>2021</td>\n",
	" <td>168</td>\n",
	" <td>Congo</td>\n",
	" <td>COG</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>20447</th>\n",
	" <td>63.40</td>\n",
	" <td>2021</td>\n",
	" <td>169</td>\n",
	" <td>Comoros</td>\n",
	" <td>COM</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>20448</th>\n",
	" <td>72.80</td>\n",
	" <td>2021</td>\n",
	" <td>170</td>\n",
	" <td>Colombia</td>\n",
	" <td>COL</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"<p>20449 rows × 5 columns</p>\n",
	"</div>"
	],
	"text/plain": [
	" values years entities name code\n",
	"0 33.94 1543 1 United Kingdom GBR\n",
	"1 38.82 1548 1 United Kingdom GBR\n",
	"2 39.59 1553 1 United Kingdom GBR\n",
	"3 22.38 1558 1 United Kingdom GBR\n",
	"4 36.66 1563 1 United Kingdom GBR\n",
	"... ... ... ... ... ...\n",
	"20444 77.00 2021 166 Costa Rica CRI\n",
	"20445 59.20 2021 167 Democratic Republic of Congo COD\n",
	"20446 63.50 2021 168 Congo COG\n",
	"20447 63.40 2021 169 Comoros COM\n",
	"20448 72.80 2021 170 Colombia COL\n",
	"\n",
	"[20449 rows x 5 columns]"
	]
	},
	"execution_count": 8,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df_with_entity_names = df.join(entity_names.set_index(\"id\"), on=\"entities\")\n",
	"df_with_entity_names"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [],
	"source": [
	"df_for_query = df_with_entity_names[[\"name\", \"years\", \"values\"]]\n",
	"df_for_query.columns = [\"country\", \"year\", metadata[\"name\"]]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [],
	"source": [
	"from langchain.agents import create_pandas_dataframe_agent\n",
	"from langchain.llms import OpenAI"
	]
	},
	{
	"attachments": {},
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Test getting intermediate steps\n",
	"\n",
	"Intermediate steps are printed by default which is fine for a jupster notebook but in a different context you often want to intercept this instead. Supposedly, return_intermediate_steps should give us a response that has an additional key to input and output that contains the intermediate step descritpions"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [],
	"source": [
	"agent = create_pandas_dataframe_agent(OpenAI(temperature=0), df_for_query, verbose=True, return_intermediate_steps=True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n",
	"\n",
	"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
	"\u001b[32;1m\u001b[1;3mThought: I need to find out how many rows are in the dataframe\n",
	"Action: python_repl_ast\n",
	"Action Input: len(df)\u001b[0m\n",
	"Observation: \u001b[36;1m\u001b[1;3m20449\u001b[0m\n",
	"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
	"Final Answer: There are 20449 rows in the dataframe.\u001b[0m\n",
	"\n",
	"\u001b[1m> Finished chain.\u001b[0m\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"{'input': 'how many rows are there?',\n",
	" 'output': 'There are 20449 rows in the dataframe.'}"
	]
	},
	"execution_count": 12,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"response = agent({\"input\": \"how many rows are there?\"})\n",
	"response"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"dict_keys(['input', 'output'])"
	]
	},
	"execution_count": 13,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"response.keys()"
	]
	},
	{
	"attachments": {},
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Doesn't look like this works...\n",
	"\n",
	"Let's just ask a few more questions"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n",
	"\n",
	"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
	"\u001b[32;1m\u001b[1;3mThought: I need to find the row in the dataframe with the lowest life expectancy for Ghana\n",
	"Action: python_repl_ast\n",
	"Action Input: df[df['country'] == 'Ghana']['Life expectancy at birth (historical)'].min()\u001b[0m\n",
	"Observation: \u001b[36;1m\u001b[1;3m28.0\u001b[0m\n",
	"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
	"Final Answer: Ghana had its lowest life expectancy in 28.0.\u001b[0m\n",
	"\n",
	"\u001b[1m> Finished chain.\u001b[0m\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"'Ghana had its lowest life expectancy in 28.0.'"
	]
	},
	"execution_count": 14,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"agent.run(\"When did Ghana have it's lowest life expectancy?\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n",
	"\n",
	"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
	"\u001b[32;1m\u001b[1;3mThought: I need to find the year with the lowest life expectancy in Ghana\n",
	"Action: python_repl_ast\n",
	"Action Input: df[df['country'] == 'Ghana']['year'].min()\u001b[0m\n",
	"Observation: \u001b[36;1m\u001b[1;3m1921\u001b[0m\n",
	"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
	"Final Answer: 1921\u001b[0m\n",
	"\n",
	"\u001b[1m> Finished chain.\u001b[0m\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"'1921'"
	]
	},
	"execution_count": 18,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"agent.run(\"In which year did Ghana have it's lowest life expectancy?\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n",
	"\n",
	"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
	"\u001b[32;1m\u001b[1;3mThought: I need to find the years for which Ghana has data\n",
	"Action: python_repl_ast\n",
	"Action Input: df[df['country'] == 'Ghana']['year'].min(), df[df['country'] == 'Ghana']['year'].max()\u001b[0m\n",
	"Observation: \u001b[36;1m\u001b[1;3m(1921, 2021)\u001b[0m\n",
	"Thought:\u001b[32;1m\u001b[1;3m I now know the earliest and latest years that Ghana has data for\n",
	"Final Answer: The earliest year that Ghana has data for is 1921 and the latest year is 2021.\u001b[0m\n",
	"\n",
	"\u001b[1m> Finished chain.\u001b[0m\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"'The earliest year that Ghana has data for is 1921 and the latest year is 2021.'"
	]
	},
	"execution_count": 15,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"agent.run(\"What are the earliest and latest years that Ghana has data for?\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n",
	"\n",
	"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
	"\u001b[32;1m\u001b[1;3mThought: I need to find the maximum value in the dataframe\n",
	"Action: python_repl_ast\n",
	"Action Input: df['Life expectancy at birth (historical)'].max()\u001b[0m\n",
	"Observation: \u001b[36;1m\u001b[1;3m86.5\u001b[0m\n",
	"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
	"Final Answer: The country with the highest life expectancy is not specified in the dataframe, but the highest life expectancy is 86.5.\u001b[0m\n",
	"\n",
	"\u001b[1m> Finished chain.\u001b[0m\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"'The country with the highest life expectancy is not specified in the dataframe, but the highest life expectancy is 86.5.'"
	]
	},
	"execution_count": 16,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"agent.run(\"Which country has the highest life expectancy?\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"\n",
	"\n",
	"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
	"\u001b[32;1m\u001b[1;3mThought: I need to find the row with the highest life expectancy\n",
	"Action: python_repl_ast\n",
	"Action Input: df.loc[df['Life expectancy at birth (historical)'].idxmax()]['country']\u001b[0m\n",
	"Observation: \u001b[36;1m\u001b[1;3mMonaco\u001b[0m\n",
	"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
	"Final Answer: Monaco\u001b[0m\n",
	"\n",
	"\u001b[1m> Finished chain.\u001b[0m\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"'Monaco'"
	]
	},
	"execution_count": 17,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"agent.run(\"What is the name of the country that has the highest life expectancy?\")"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "jupyter-bx2KX8rc",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.11.2"
	},
	"orig_nbformat": 4
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}