Created
November 25, 2022 19:19
-
-
Save 404Wolf/138a8fb37c202e3ad53dd1c9084869f4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import aiohttp | |
import logging | |
import asyncio | |
logger = logging.getLogger(__name__) | |
async def isAlive(proxy: str, session: aiohttp.ClientSession(), timeout=5, retries=2) -> bool: | |
""" | |
Check if a proxy is alive. | |
Args: | |
proxy (str): the proxy to check (full url including http://) | |
timeout (int): how many seconds max ping to proxy | |
retries (int): how many times to retry request upon os or timeout error | |
Returns: | |
bool: True if proxy is alive, False if proxy is dead | |
""" | |
timeout = aiohttp.ClientTimeout(total=timeout) | |
# attempt to reach the proxy 3 times, if all fail then return that the proxy is dead | |
for i in range(retries): | |
try: | |
if await session.get("http://example.com", timeout=timeout, proxy=proxy): | |
logger.info(f"{proxy} is alive.") | |
return True | |
# break = no hope left, the proxy is dead | |
# continue = it is possible it was a fluke, retry 2 more times | |
except aiohttp.client_exceptions.ServerDisconnectedError: | |
break | |
except aiohttp.client_exceptions.ClientHttpProxyError: | |
break | |
except aiohttp.client_exceptions.TooManyRedirects: | |
break | |
except aiohttp.client_exceptions.ClientOSError: | |
continue | |
except asyncio.exceptions.TimeoutError: | |
continue | |
logger.info(f"{proxy} is dead.") | |
return False | |
async def proxies(timeout=5) -> List[str]: | |
""" | |
Retreive a list of alive free proxies. | |
Args: | |
timeout (int): how many seconds max ping to proxy | |
Returns: | |
list: list of alive proxies in format "http://ip:port" | |
""" | |
# fetch list of free proxies | |
async with aiohttp.ClientSession() as session: # create client session for proxy scanning | |
async with session.get( | |
"https://api.proxyscrape.com/v2/?request=getproxies&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all" | |
) as resp: | |
# convert line seperated string into a list of proxies | |
proxies = await resp.text() | |
proxies = proxies.strip().split("\n") | |
proxies = [f"http://{proxy.strip()}" for proxy in proxies] | |
logger.debug(f"{len(proxies)} proxies found. Filtering out dead proxies...") | |
# create proxy checker tasks | |
tasks = [] | |
for counter, proxy in enumerate(proxies): | |
tasks.append(asyncio.create_task(isAlive(proxy, session, timeout=timeout))) | |
if counter % 100 == 0: | |
await asyncio.wait(tasks) | |
tasks = await asyncio.gather(*tasks) | |
for index, proxy in enumerate(proxies): | |
if not tasks[index]: | |
proxies.remove(proxy) | |
# return output list of alive proxies | |
logger.info(f"Fetched {len(proxies)} alive proxies.") | |
logger.debug("\n".join(proxies)) | |
return proxies |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment