arthur-tomsjj/nhentai crawler

## nhentai crawler
import urllib.request as req
import requests
from bs4 import BeautifulSoup
import os


file_path = input("輸入本子要存放的路徑:")

Header = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'}
while(1):
    print()
    url = input("請輸入nhentai本子的網址:")
    Nhentai = req.Request(url,headers = Header)

    with req.urlopen(Nhentai) as response:
        data = response.read().decode('utf-8')

    page = BeautifulSoup(data,'html.parser')

    title = page.find('div',id = 'info')
    book_name = title.h2.text #本子名，N網經常改這裡的寫法，要注意...若程式出錯，先檢查這裡
    temp = ''
    for _str in book_name: # 去除有?的字元
        if _str != '?':
            temp += _str
    book_name = temp

    path = file_path+'\\'+book_name
    if not os.path.isdir(path):
        os.mkdir(path)

    All_page = page.find_all('div',class_ = 'thumb-container')
    pagenum = len(All_page)

    print("下載項目: "+book_name)
    print("總頁數: %d \n"%pagenum)
    print("開始執行下載:")
    for index in range(1,pagenum+1):
        link = url+str(index)+'/'
        Nhentai = req.Request(link,headers = Header)

        with req.urlopen(Nhentai) as response:
            data = response.read().decode('utf-8')

        page = BeautifulSoup(data,'html.parser')
        target = page.find('section',id = 'image-container')

        if target.a.img['src'].find("http") == -1:
            if url.find('nhentai.to') != -1:
                img = "https://nhentai.to"+target.a.img['src']
            else:
                img = "https://nhentai.net"+target.a.img['src']
        else :
            img = target.a.img['src']
        download = requests.get(img)
        fname = path+'\\'+img.split('/')[-1]

        with open(fname,'wb') as f:
            f.write(download.content)
            f.close()
            print("downloaded: "+img.split('/')[-1])

    del data
    del download
    del page
    print("\n下載完成......")
	import urllib.request as req
	import requests
	from bs4 import BeautifulSoup
	import os


	file_path = input("輸入本子要存放的路徑:")

	Header = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'}
	while(1):
	print()
	url = input("請輸入nhentai本子的網址:")
	Nhentai = req.Request(url,headers = Header)

	with req.urlopen(Nhentai) as response:
	data = response.read().decode('utf-8')

	page = BeautifulSoup(data,'html.parser')

	title = page.find('div',id = 'info')
	book_name = title.h2.text #本子名，N網經常改這裡的寫法，要注意...若程式出錯，先檢查這裡
	temp = ''
	for _str in book_name: # 去除有?的字元
	if _str != '?':
	temp += _str
	book_name = temp

	path = file_path+'\\'+book_name
	if not os.path.isdir(path):
	os.mkdir(path)

	All_page = page.find_all('div',class_ = 'thumb-container')
	pagenum = len(All_page)

	print("下載項目: "+book_name)
	print("總頁數: %d \n"%pagenum)
	print("開始執行下載:")
	for index in range(1,pagenum+1):
	link = url+str(index)+'/'
	Nhentai = req.Request(link,headers = Header)

	with req.urlopen(Nhentai) as response:
	data = response.read().decode('utf-8')

	page = BeautifulSoup(data,'html.parser')
	target = page.find('section',id = 'image-container')

	if target.a.img['src'].find("http") == -1:
	if url.find('nhentai.to') != -1:
	img = "https://nhentai.to"+target.a.img['src']
	else:
	img = "https://nhentai.net"+target.a.img['src']
	else :
	img = target.a.img['src']
	download = requests.get(img)
	fname = path+'\\'+img.split('/')[-1]

	with open(fname,'wb') as f:
	f.write(download.content)
	f.close()
	print("downloaded: "+img.split('/')[-1])

	del data
	del download
	del page
	print("\n下載完成......")