Last active
August 23, 2023 08:00
-
-
Save barraponto/4b918fdc7d7191c1cd172f848feaeec5 to your computer and use it in GitHub Desktop.
Basic Webscraper in Python, dealing with cards from Neonmob.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import aiometer | |
from aiocsv.readers import AsyncDictReader | |
from aiohttp import ClientSession as Session, ClientTimeout | |
from aiofiles import open | |
from yarl import URL | |
HOST = "http://d1wwra234reihl.cloudfront.net" | |
DEFAULT_HEADERS = {"Referer": "https://www.neonmob.com/"} | |
DEFAULT_SESSION_KWARGS = dict( | |
headers=DEFAULT_HEADERS, | |
raise_for_status=True, | |
timeout=ClientTimeout(total=None) | |
) | |
DEFAULT_PATH = "cartinhas" | |
async def download(session: Session, url_path: str, destination: str): | |
async with session.get(url_path) as response: | |
async with open(destination, "wb") as file: | |
await file.write(await response.read()) | |
return url_path, destination | |
async def make_download(session: Session, card: dict): | |
url_path = URL(card["asset"]).path | |
suffix = Path(url_path).suffix | |
try: | |
return await download(session, url_path, f"{DEFAULT_PATH}/{card['id']}{suffix}") | |
except Exception as err: | |
logging.exception(f"Download failed for {url_path}.") | |
async def main(): | |
async with Session(HOST, **DEFAULT_SESSION_KWARGS) as session: | |
async with open("./remaining.csv") as cards_file: | |
reader = AsyncDictReader(cards_file) | |
await anext(reader) # load the headers line | |
async with aiometer.amap( | |
partial(make_download, session), | |
[card async for card in reader], | |
max_at_once=8, | |
max_per_second=4, | |
) as cards: | |
async for result in cards: | |
if not result: | |
continue | |
path, destination = result | |
logging.debug(f"Succesfully downloaded {path} to {destination}.") | |
asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment