Skip to content

Instantly share code, notes, and snippets.

@404Wolf
Created November 25, 2022 19:19
Show Gist options
  • Save 404Wolf/138a8fb37c202e3ad53dd1c9084869f4 to your computer and use it in GitHub Desktop.
Save 404Wolf/138a8fb37c202e3ad53dd1c9084869f4 to your computer and use it in GitHub Desktop.
import aiohttp
import logging
import asyncio
logger = logging.getLogger(__name__)
async def isAlive(proxy: str, session: aiohttp.ClientSession(), timeout=5, retries=2) -> bool:
"""
Check if a proxy is alive.
Args:
proxy (str): the proxy to check (full url including http://)
timeout (int): how many seconds max ping to proxy
retries (int): how many times to retry request upon os or timeout error
Returns:
bool: True if proxy is alive, False if proxy is dead
"""
timeout = aiohttp.ClientTimeout(total=timeout)
# attempt to reach the proxy 3 times, if all fail then return that the proxy is dead
for i in range(retries):
try:
if await session.get("http://example.com", timeout=timeout, proxy=proxy):
logger.info(f"{proxy} is alive.")
return True
# break = no hope left, the proxy is dead
# continue = it is possible it was a fluke, retry 2 more times
except aiohttp.client_exceptions.ServerDisconnectedError:
break
except aiohttp.client_exceptions.ClientHttpProxyError:
break
except aiohttp.client_exceptions.TooManyRedirects:
break
except aiohttp.client_exceptions.ClientOSError:
continue
except asyncio.exceptions.TimeoutError:
continue
logger.info(f"{proxy} is dead.")
return False
async def proxies(timeout=5) -> List[str]:
"""
Retreive a list of alive free proxies.
Args:
timeout (int): how many seconds max ping to proxy
Returns:
list: list of alive proxies in format "http://ip:port"
"""
# fetch list of free proxies
async with aiohttp.ClientSession() as session: # create client session for proxy scanning
async with session.get(
"https://api.proxyscrape.com/v2/?request=getproxies&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all"
) as resp:
# convert line seperated string into a list of proxies
proxies = await resp.text()
proxies = proxies.strip().split("\n")
proxies = [f"http://{proxy.strip()}" for proxy in proxies]
logger.debug(f"{len(proxies)} proxies found. Filtering out dead proxies...")
# create proxy checker tasks
tasks = []
for counter, proxy in enumerate(proxies):
tasks.append(asyncio.create_task(isAlive(proxy, session, timeout=timeout)))
if counter % 100 == 0:
await asyncio.wait(tasks)
tasks = await asyncio.gather(*tasks)
for index, proxy in enumerate(proxies):
if not tasks[index]:
proxies.remove(proxy)
# return output list of alive proxies
logger.info(f"Fetched {len(proxies)} alive proxies.")
logger.debug("\n".join(proxies))
return proxies
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment