arthur-tomsjj/ehentai crawler test

## ehentai crawler test
#######   e-hentai本子 爬蟲測試  #######

import urllib.request as req
import requests
from bs4 import BeautifulSoup
import os
import time

url = 'https://e-hentai.org/s/42331c9fc6/1467036-1'
Header = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'}
pagenum = 96    #本子總頁數
count = 1       #計算目前存到第幾頁

def crawl(url,count):
    source = req.Request(url,headers = Header)
    with req.urlopen(source) as response:
        data = response.read().decode('utf-8')

    page = BeautifulSoup(data,"html.parser")

    div = page.find('div',id='i3')        #找出div id為i3

    Next_link = div.a['href']             #抓div中的a標籤 提取裡面href的路徑
    img = div.a.img['src']                #抓div中的a中的img標籤 提取裡面src的路徑
    fname = '爬蟲img/'+str(count)+'.jpg'  #檔案儲存路徑+名稱
    download = requests.get(img)

    with open(fname,'wb') as f:
        f.write(download.content)
        f.close()
        del download
    print('downloaded: '+fname.split('/')[-1])

    if(count<pagenum):
        count+=1
        del source
        crawl(Next_link,count)

#開始抓爬
crawl(url,count)
	####### e-hentai本子爬蟲測試 #######

	import urllib.request as req
	import requests
	from bs4 import BeautifulSoup
	import os
	import time

	url = 'https://e-hentai.org/s/42331c9fc6/1467036-1'
	Header = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'}
	pagenum = 96 #本子總頁數
	count = 1 #計算目前存到第幾頁

	def crawl(url,count):
	source = req.Request(url,headers = Header)
	with req.urlopen(source) as response:
	data = response.read().decode('utf-8')

	page = BeautifulSoup(data,"html.parser")

	div = page.find('div',id='i3') #找出div id為i3

	Next_link = div.a['href'] #抓div中的a標籤提取裡面href的路徑
	img = div.a.img['src'] #抓div中的a中的img標籤提取裡面src的路徑
	fname = '爬蟲img/'+str(count)+'.jpg' #檔案儲存路徑+名稱
	download = requests.get(img)

	with open(fname,'wb') as f:
	f.write(download.content)
	f.close()
	del download
	print('downloaded: '+fname.split('/')[-1])

	if(count<pagenum):
	count+=1
	del source
	crawl(Next_link,count)

	#開始抓爬
	crawl(url,count)