jayzhan211/Wenku8.py

## Wenku8.py
import requests
import re
import os
import sys
import time
from bs4 import BeautifulSoup

load_path = r'C:\Users\owner\Wenku8'

headers = {
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36',
    'Cookie':'Hm_lvt_d72896ddbf8d27c750e3b365ea2fc902=1542500172,1542500185; UM_distinctid=167242d0e1d578-0a974a88240399-3a3a5c0e-1fa400-167242d0e1e4; jieqiUserCharset=big5; PHPSESSID=cd1k29vc75laat22jq0p7sooca705gd8; jieqiUserInfo=jieqiUserId%3D416755%2CjieqiUserName%3Djiburiru%2CjieqiUserGroup%3D3%2CjieqiUserVip%3D0%2CjieqiUserPassword%3D1d95f663dfa0ce76562ee8ca0ea90089%2CjieqiUserName_un%3Djiburiru%2CjieqiUserHonor_un%3D%26%23x65B0%3B%26%23x624B%3B%26%23x4E0A%3B%26%23x8DEF%3B%2CjieqiUserGroupName_un%3D%26%23x666E%3B%26%23x901A%3B%26%23x4F1A%3B%26%23x5458%3B%2CjieqiUserLogin%3D1542505405; jieqiVisitInfo=jieqiUserLogin%3D1542505405%2CjieqiUserId%3D416755; CNZZDATA1309966=cnzz_eid%3D2004785943-1542499396-%26ntime%3D1542501320; Hm_lpvt_d72896ddbf8d27c750e3b365ea2fc902=1542505409; CNZZDATA1259916661=2097782824-1542499949-%7C1542501702'
}
url = 'https://www.wenku8.net/modules/article/articlelist.php?class='
resp=requests.get(url=url,headers=headers)
resp.encoding='big5'
#print(resp.text)

soup=BeautifulSoup(resp.text,'lxml')

for div in soup.find_all('div',{'style':'width:95px;float:left;'}):
    for a in div.find_all('a'):
        title=a.get('title')
        url=a.get('href')
        id = re.sub('[^0-9]','',url)[1:]
        book_url='https://www.wenku8.net/modules/article/reader.php?aid='+str(id)
        print('{}: {}'.format(title,book_url))

        if not os.path.exists(os.path.join(load_path, title)):
            os.makedirs(os.path.join(load_path, title))
        os.chdir(os.path.join(load_path, title))

        reps2=requests.get(url=book_url,headers=headers)
        reps2.encoding='big5'
        soup = BeautifulSoup(reps2.text,'lxml')
        #print(soup)
        for td in soup.find_all('td',{'class':'ccss'}):
            for a in td.find_all('a'):
                print(a)
                url=a.get('href')
                title2=a.text
                print('{}: {}'.format(title2,url))

                if not os.path.exists(os.path.join(load_path, title, title2)):
                    os.makedirs(os.path.join(load_path, title, title2))
                os.chdir(os.path.join(load_path, title, title2))


                reps3=requests.get(url=url,headers=headers)
                reps3.encoding='big5'
                soup = BeautifulSoup(reps3.text,'lxml')
                for line in soup.find_all('div',{'id':'content'}):
                    print(line.text)
                    with open(title2+'.txt',"w") as f:
                        f.write(line.text.encode(sys.stdin.encoding, "replace").decode(sys.stdin.encoding))
                    print('OK!!!')
	import requests
	import re
	import os
	import sys
	import time
	from bs4 import BeautifulSoup

	load_path = r'C:\Users\owner\Wenku8'

	headers = {
	'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36',
	'Cookie':'Hm_lvt_d72896ddbf8d27c750e3b365ea2fc902=1542500172,1542500185; UM_distinctid=167242d0e1d578-0a974a88240399-3a3a5c0e-1fa400-167242d0e1e4; jieqiUserCharset=big5; PHPSESSID=cd1k29vc75laat22jq0p7sooca705gd8; jieqiUserInfo=jieqiUserId%3D416755%2CjieqiUserName%3Djiburiru%2CjieqiUserGroup%3D3%2CjieqiUserVip%3D0%2CjieqiUserPassword%3D1d95f663dfa0ce76562ee8ca0ea90089%2CjieqiUserName_un%3Djiburiru%2CjieqiUserHonor_un%3D%26%23x65B0%3B%26%23x624B%3B%26%23x4E0A%3B%26%23x8DEF%3B%2CjieqiUserGroupName_un%3D%26%23x666E%3B%26%23x901A%3B%26%23x4F1A%3B%26%23x5458%3B%2CjieqiUserLogin%3D1542505405; jieqiVisitInfo=jieqiUserLogin%3D1542505405%2CjieqiUserId%3D416755; CNZZDATA1309966=cnzz_eid%3D2004785943-1542499396-%26ntime%3D1542501320; Hm_lpvt_d72896ddbf8d27c750e3b365ea2fc902=1542505409; CNZZDATA1259916661=2097782824-1542499949-%7C1542501702'
	}
	url = 'https://www.wenku8.net/modules/article/articlelist.php?class='
	resp=requests.get(url=url,headers=headers)
	resp.encoding='big5'
	#print(resp.text)

	soup=BeautifulSoup(resp.text,'lxml')

	for div in soup.find_all('div',{'style':'width:95px;float:left;'}):
	for a in div.find_all('a'):
	title=a.get('title')
	url=a.get('href')
	id = re.sub('[^0-9]','',url)[1:]
	book_url='https://www.wenku8.net/modules/article/reader.php?aid='+str(id)
	print('{}: {}'.format(title,book_url))

	if not os.path.exists(os.path.join(load_path, title)):
	os.makedirs(os.path.join(load_path, title))
	os.chdir(os.path.join(load_path, title))

	reps2=requests.get(url=book_url,headers=headers)
	reps2.encoding='big5'
	soup = BeautifulSoup(reps2.text,'lxml')
	#print(soup)
	for td in soup.find_all('td',{'class':'ccss'}):
	for a in td.find_all('a'):
	print(a)
	url=a.get('href')
	title2=a.text
	print('{}: {}'.format(title2,url))

	if not os.path.exists(os.path.join(load_path, title, title2)):
	os.makedirs(os.path.join(load_path, title, title2))
	os.chdir(os.path.join(load_path, title, title2))


	reps3=requests.get(url=url,headers=headers)
	reps3.encoding='big5'
	soup = BeautifulSoup(reps3.text,'lxml')
	for line in soup.find_all('div',{'id':'content'}):
	print(line.text)
	with open(title2+'.txt',"w") as f:
	f.write(line.text.encode(sys.stdin.encoding, "replace").decode(sys.stdin.encoding))
	print('OK!!!')