Skip to content

Instantly share code, notes, and snippets.

@bowmanjd
Created August 14, 2020 22:29
Show Gist options
  • Save bowmanjd/71da19ad4544e9dea0e8b165eda71c4b to your computer and use it in GitHub Desktop.
Save bowmanjd/71da19ad4544e9dea0e8b165eda71c4b to your computer and use it in GitHub Desktop.
"""Proof-of-concept asynchronous Wikipedia search tool."""
import asyncio
import logging
import time
import httpx
EMAIL = "your_email@provider" # or Github URL or other identifier
USER_AGENT = {"user-agent": f"pypedia/0.1.0 ({EMAIL})"}
logging.basicConfig(filename="asyncpedia.log", filemode="w", level=logging.INFO)
LOG = logging.getLogger("asyncio")
async def search(query, limit=100, client=None):
"""Search Wikipedia, returning a JSON list of pages."""
if client:
close_client = False
else:
client = httpx.AsyncClient()
close_client = True
LOG.info(f"Start query '{query}': {time.strftime('%X')}")
url = "https://en.wikipedia.org/w/rest.php/v1/search/page"
params = {"q": query, "limit": limit}
response = await client.get(url, params=params)
if close_client:
await client.aclose()
LOG.info(f"End query '{query}': {time.strftime('%X')}")
return response
async def list_articles(queries):
"""Execute several Wikipedia searches."""
async with httpx.AsyncClient(headers=USER_AGENT) as client:
tasks = [search(query, client=client) for query in queries]
responses = await asyncio.gather(*tasks)
results = (response.json()["pages"] for response in responses)
return dict(zip(queries, results))
def run():
queries = [
"linksto:Python_(programming_language)",
"incategory:Computer_programming",
"incategory:Programming_languages",
"incategory:Python_(programming_language)",
"incategory:Python_web_frameworks",
"incategory:Python_implementations",
"incategory:Programming_languages_created_in_1991",
"incategory:Computer_programming_stubs",
]
results = asyncio.run(list_articles(queries))
for query, articles in results.items():
print(f"\n*** {query} ***")
for article in articles:
print(f"{article['title']}: {article['excerpt']}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment