Created
October 3, 2022 20:45
-
-
Save itssoap/b8a5aff2944352ff9ef515ffde0817f1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup as bs | |
import httpx | |
import requests | |
import aiohttp | |
import asyncio | |
import time | |
from pathlib import Path | |
from slowpoke import downloader | |
from warnings import warn | |
nuke = "402543" | |
url = f"https://nhentai.to/g/{nuke}/" | |
headers = { | |
'Accept': '*/*', | |
'Accept-Language': 'en-US,en;q=0.9', | |
'Cache-Control': 'no-cache', | |
'Connection': 'keep-alive', | |
'Pragma': 'no-cache', | |
'Referer': 'https://nhentai.to', | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36', | |
'X-Requested-With': 'XMLHttpRequest', | |
} | |
def page_count(soup): | |
temp = soup.find('div', {'id': 'info'}).findChildren() | |
for i in temp: | |
if "page" in i.text: | |
page = i.text.split(" ")[0] | |
return page | |
async def grabber(count): | |
pages = [] | |
async with aiohttp.ClientSession() as session: | |
for i in range(1, int(count)+1): | |
async with session.get(f"{url}{i}/", headers=headers) as resp: | |
pages.append(await resp.read()) | |
images = [] | |
for page in pages: | |
page = bs(page, "html.parser") | |
images.append(page.find('img', {'class': 'fit-horizontal'}).get('src')) | |
return images | |
async def downloader(images): | |
path = Path(f"{nuke}") | |
try: | |
path.mkdir(parents=True) | |
except FileExistsError: | |
warn(f'Folder "{path}" already exists, skipping creation') | |
async with aiohttp.ClientSession() as session: | |
for image in images: | |
async with session.get(image, headers=headers) as resp: | |
with open(f"{path}/{image.split('/')[-1]}", "wb") as f: | |
f.write(await resp.read()) | |
def main(): | |
html = requests.get(url, headers=headers).text | |
soup = bs(html, "html.parser") | |
count = page_count(soup) | |
images = asyncio.run(grabber(count)) | |
asyncio.run(downloader(images)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment