Skip to content

Instantly share code, notes, and snippets.

@clbarnes
Created August 18, 2023 18:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save clbarnes/33b844bf516a28f9f0125fef5f4c8af8 to your computer and use it in GitHub Desktop.
Save clbarnes/33b844bf516a28f9f0125fef5f4c8af8 to your computer and use it in GitHub Desktop.
Async client for fetching HTML content from pages requiring javascript execution, using a pool of tabs
from contextlib import asynccontextmanager
import asyncio as aio
from playwright.async_api import async_playwright
class BrowserPool:
def __init__(self, n_tabs=10, executable=None) -> None:
self.executable = executable
self.n_tabs = n_tabs
self.tabs_remaining = n_tabs
self.context = None
self.browser = None
self.tab_queue = aio.Queue()
def _check_active(self):
if self.browser is None:
raise RuntimeError("Pool is not active")
async def open(self):
if self.context is None:
self.context = async_playwright()
playwright = await self.context.__aenter__()
self.browser = await playwright.chromium.launch(executable_path=self.executable)
return self
async def close(self, *args, **kwargs):
if self.context is None:
return None
for _ in range(self.n_tabs - self.tabs_remaining):
await self.tab_queue.get()
await self.browser.close()
self.browser = None
ret = await self.context.__aexit__(*args, **kwargs)
self.context = None
return ret
async def __aenter__(self):
return await self.open()
async def __aexit__(self, *args, **kwargs):
return await self.close(*args, **kwargs)
@asynccontextmanager
async def tab(self):
self._check_active()
if self.tabs_remaining > 0:
try:
tab = self.tab_queue.get_nowait()
except aio.QueueEmpty:
self.tabs_remaining -= 1
tab = await self.browser.new_page()
else:
tab = await self.tab_queue.get()
try:
yield tab
finally:
await self.tab_queue.put(tab)
async def get(self, url: str, selector: str | None = None):
async with self.tab() as t:
await t.goto(url)
if selector is not None:
await t.wait_for_selector(selector, state="attached")
return await t.content()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment