-
-
Save jkotra/a249e04f8896badf8c53c2840527bc6b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Wikipedia Agent\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "This notebook demonstrates building an LLM agent with **llama 3 70B** (Powered by Groq). The Objective is to create a self navigating LLM agent to extract latest information from wikipedia on the asked query.\n", | |
| "\n", | |
| "If you have any questions or queries, you can reach out to me at [<jagadeesh.kotra@technoidentity.com>](mailto:jagadeesh.kotra@technoidnetity.com)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Install dependencies\n", | |
| "\n", | |
| "%pip install requests groq Wikipedia-API" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Define tools and utility functions" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import re\n", | |
| "import requests\n", | |
| "import wikipediaapi\n", | |
| "import pandas as pd\n", | |
| "\n", | |
| "def extract_function_and_args(text: str):\n", | |
| " pattern = r'([a-zA-Z_]+\\w*)\\((.*?)\\)'\n", | |
| " match = re.search(pattern, text)\n", | |
| " \n", | |
| " if match:\n", | |
| " function_name = match.group(1)\n", | |
| " arguments = [arg.strip().replace(\"'\", \"\") for arg in match.group(2).split(',')]\n", | |
| " return function_name, arguments\n", | |
| "\n", | |
| "def search_wikipedia_api(query: str, n=2):\n", | |
| " search_results = requests.get(f\"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={query}&utf8=&format=json\")\n", | |
| " return search_results.json()[\"query\"][\"search\"][:n]\n", | |
| "\n", | |
| "def extract_summary(title: str):\n", | |
| " wiki = wikipediaapi.Wikipedia(user_agent=\"TI_agent_demo\")\n", | |
| " page = wiki.page(title)\n", | |
| " return page.summary\n", | |
| "\n", | |
| "def wikipedia_summary(search_term: str):\n", | |
| " summary_store = []\n", | |
| " for result in search_wikipedia_api(search_term):\n", | |
| " summary_store.append({\"title\": result['title'], \"summary\": extract_summary(result['title'])})\n", | |
| " return summary_store\n", | |
| " \n", | |
| "def format_summary(summaries: list) -> str:\n", | |
| " return \"\\n\\n\".join(map(lambda s: f\"Title: {s['title']}\\nSummary: {s['summary']}\", summaries))\n", | |
| "\n", | |
| "def format_sections(data: list[str]):\n", | |
| " return \"\".join(map(lambda x: f\"-{x}\\n\", data))\n", | |
| "\n", | |
| "def get_wikipedia_page_sections(title: str):\n", | |
| " wiki = wikipediaapi.Wikipedia(user_agent=\"TI_agent_demo\")\n", | |
| " page = wiki.page(title)\n", | |
| " return str([s.title for s in page.sections])\n", | |
| "\n", | |
| "def get_wikipedia_section_content(title: str, section: str):\n", | |
| " wiki = wikipediaapi.Wikipedia(user_agent=\"TI_agent_demo\", extract_format=wikipediaapi.ExtractFormat.WIKI)\n", | |
| " return wiki.page(title).section_by_title(section).full_text(level=10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import datetime\n", | |
| "\n", | |
| "WIKI_SEARCH_SYSTEM_PROMPT = f\"\"\"\n", | |
| "You are an AI with access to latest information via tools. You have access to the following tools:\n", | |
| "- wikipedia_summary\n", | |
| " - help: get summary of various wikipedia pages (top 3) based on the search term.\n", | |
| " - usage: wikipedia_summary('search term')\n", | |
| "- wikipedia_page_sections\n", | |
| " - help: get all the sections in a wikipedia page\n", | |
| " - usage: wikipedia_page('title of the page')\n", | |
| "- wikipedia_section\n", | |
| " - help: get content of a specific section in a wikipedia page. this will contain contect of the requested section and all subsections under it.\n", | |
| " - usage: wikipedia_section('title of the page', 'name of the section')\n", | |
| "\n", | |
| "current system time (UTC): {datetime.datetime.now(datetime.UTC).now()}\n", | |
| "based on the given question, call the appropriate tool with relevant arguments. The result will be provided back to you, You have ability to call the tool multiple times until you have relevant information to form a final answer. Only answer the question from context.\n", | |
| "use `wikipedia_summary` tool to get summary of wiki pages. of these, use `wikipedia_page_sections` tool to get page sections by providing page title as arg. use `wikipedia_section` to extract content from required section. \n", | |
| "\n", | |
| "Follow the guidelines carefully:\n", | |
| "- Your output must always contain a function call unless its your final answer.\n", | |
| "- a call to a function must be enclosed within backticks.\n", | |
| "- DO NOT to search based on your own knowledge cutoff timeline. always assume tools will provide latest information.\n", | |
| "\"\"\"" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from groq import Groq\n", | |
| "\n", | |
| "client = Groq(api_key=\"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\")\n", | |
| "\n", | |
| "def ask_llama3(sys_prompt: str, agent_messages: str):\n", | |
| " completion = client.chat.completions.create(\n", | |
| " model=\"llama3-70b-8192\",\n", | |
| " messages=[\n", | |
| " {\n", | |
| " \"role\": \"system\",\n", | |
| " \"content\": sys_prompt\n", | |
| " },\n", | |
| " *agent_messages\n", | |
| " ],\n", | |
| " temperature=1,\n", | |
| " max_tokens=8096,\n", | |
| " top_p=1,\n", | |
| " stream=True,\n", | |
| " stop=None,\n", | |
| " )\n", | |
| " return \"\".join([chunk.choices[0].delta.content or \"\" for chunk in completion])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Query: >> who won 2024 loksabha elections?\n", | |
| "\u001b[31m `wikipedia_summary('2024 Indian general election')` \u001b[39m\n", | |
| "extracted function call >>> wikipedia_summary ['2024 Indian general election']\n", | |
| "Title: 2024 Indian general election\n", | |
| "Summary: General elections were held in India from 19 April to 1 June 2024 in seven phases, to elect all 543 members of the Lok Sabha. Votes were counted and the result was declared on 4 June to form the 18th Lok Sabha. The legislative assembly elections in the states of Andhra Pradesh, Arunachal Pradesh, Odisha, and Sikkim were held simultaneously with the general election, along with the by-elections for 25 constituencies in 12 legislative assemblies.\n", | |
| "More than 968 million people out of a population of 1.4 billion people were eligible to vote, equivalent to 70 percent of the total population. 642 million voters participated in the election and 312 million of them were women, making it the highest ever participation by women voters. This was the largest-ever election in history, surpassing the previous election, and lasted 44 days, second only to the 1951–52 Indian general election. \n", | |
| "Incumbent prime minister Narendra Modi, who completed a second term, ran for a third consecutive term after his Bharatiya Janata Party (BJP) had enjoyed an absolute majority—a minimum of 272 seats—in the 2019 and 2014 elections. The primary opposition was the Indian National Developmental Inclusive Alliance (INDIA), a coalition formed by the Indian National Congress (INC) and many regional parties in 2023. The election was criticised for lack of action on hate speeches by Modi's BJP, reported Electronic Voting Machine (EVM) malfunctioning, and alleged suppression of political opponents of the BJP. \n", | |
| "Pre-polling and post-polling opinion surveys from mainstream media outlets predicted a decisive victory for the BJP and its coalition, the National Democratic Alliance (NDA). The BJP won 240 seats, down from the 303 seats it had secured in 2019, and lost its singular majority in the Lok Sabha, whereas the overall NDA secured 293 of the house's 543 seats. The INDIA coalition outperformed expectations, securing 234 seats, 99 of which were won by the Congress, garnering the party the Official Opposition status. By 5 June, Modi confirmed the support of 303 MPs, including the NDA MPs, three MPs from minor parties, and ten other independent MPs to Droupadi Murmu, the 15th President of India. This marked his third term as Prime Minister and his first term heading a coalition government, relying mainly on the support of two NDA allies: the Telugu Desam Party of Andhra Pradesh and Janata Dal (United) of Bihar.\n", | |
| "\n", | |
| "Title: 2024 Indian general election in Punjab\n", | |
| "Summary: The 2024 Indian general election in Punjab was held on 1 June 2024 to elect 13 members of 18th Lok Sabha.\n", | |
| "\u001b[31m `wikipedia_page_sections('2024 Indian general election')` \u001b[39m\n", | |
| "extracted function call >>> wikipedia_page_sections ['2024 Indian general election']\n", | |
| "['Background', 'Schedule', 'Parties and alliances', 'Candidates', 'Issues', 'Party campaigns', 'Surveys and polls', 'Voting', 'Misinformation', 'Voter turnout', 'Results', 'Aftermath', 'See also', 'Notes', 'References', 'External links']\n", | |
| "\u001b[31m `wikipedia_section('2024 Indian general election', 'Results')` \u001b[39m\n", | |
| "extracted function call >>> wikipedia_section ['2024 Indian general election', 'Results']\n", | |
| "Results\n", | |
| "Following the first round, the BJP won its first seat after Mukesh Dalal, its candidate for Surat constituency in Gujarat, was elected unopposed following rejection and withdrawal of other candidates. No voting was held in the constituency, as the ECI had certified the results two weeks prior due to the absence of rival candidates.\n", | |
| "\n", | |
| "،By alliance and party\n", | |
| "By state or union territory\n", | |
| "\n", | |
| "If you have enough information to answer the query, present your final answer.\n", | |
| "\u001b[31m The BJP won 240 seats, and the overall NDA secured 293 of the house's 543 seats. The INDIA coalition outperformed expectations, securing 234 seats, 99 of which were won by the Congress. \u001b[39m\n", | |
| "\u001b[46m Final Answer: \u001b[49m \u001b[32m The BJP won 240 seats, and the overall NDA secured 293 of the house's 543 seats. The INDIA coalition outperformed expectations, securing 234 seats, 99 of which were won by the Congress. \u001b[39m\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "from colorama import Fore, Back\n", | |
| "\n", | |
| "\n", | |
| "USER_QUERY = input(\"Ask a Question:\")\n", | |
| "print(f\"Query: >> {USER_QUERY}\")\n", | |
| "\n", | |
| "agent_messages = [\n", | |
| " {\n", | |
| " \"role\": \"user\",\n", | |
| " \"content\": USER_QUERY\n", | |
| " }\n", | |
| "]\n", | |
| "\n", | |
| "# track current iteration.\n", | |
| "iterations = 0\n", | |
| "\n", | |
| "while True:\n", | |
| " output = ask_llama3(WIKI_SEARCH_SYSTEM_PROMPT, agent_messages)\n", | |
| " print(Fore.RED, output, Fore.RESET)\n", | |
| " agent_messages.append({\"role\": \"assistant\", \"content\": output})\n", | |
| " try:\n", | |
| " f, args = extract_function_and_args(output)\n", | |
| " print(\"extracted function call >>>\", f, args)\n", | |
| " match f:\n", | |
| " case \"wikipedia_summary\":\n", | |
| " f_result = format_summary(wikipedia_summary(args[0]))\n", | |
| " case \"wikipedia_page_sections\":\n", | |
| " f_result = get_wikipedia_page_sections(args[0])\n", | |
| " case \"wikipedia_section\":\n", | |
| " try:\n", | |
| " f_result = get_wikipedia_section_content(args[0], args[1])\n", | |
| " f_result += \"\\nIf you have enough information to answer the query, present your final answer.\"\n", | |
| " except Exception as e:\n", | |
| " f_result = \"section does not exist! please choose only from the available sections.\"\n", | |
| " print(f_result)\n", | |
| " except Exception as e:\n", | |
| " print(Back.CYAN, \"Final Answer:\", Back.RESET, Fore.GREEN, output, Fore.RESET)\n", | |
| " break\n", | |
| " agent_messages.append({\"role\": \"user\", \"content\": f_result})\n", | |
| " iterations += 1\n", | |
| " if iterations > 10:\n", | |
| " break" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.12.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment