-
-
Save anonymous/c95fd30a078b74e83e32 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#-*- coding:utf-8 -*- | |
import requests | |
import subprocess | |
import os | |
import json | |
import time | |
from bs4 import BeautifulSoup | |
WorkSpace='.' | |
ComicCDNHost='http://images.dmzj.com/' | |
def Log(String): | |
print time.strftime('%I:%M:%S',time.localtime(time.time()))+' - '+String | |
def xxxMakeFileSystemReady(): | |
system('md _DataBase') | |
def xxxGenerateUID(URL):#http://www.dmzj.com/info/zhenlizhitu.htmlhttp://www.dmzj.com/view/zhenlizhitu/37273.html | |
Stage=URL.find('http://www.dmzj.com/') | |
if Stage!=-1: | |
Stage=URL.find('/info/') | |
if Stage!=-1: | |
ToReturn=URL.split('/info/')[1].split('.')[0] | |
return ToReturn | |
else: | |
ToReturn=URL.split('/view/')[1].split('/')[0] | |
return ToReturn | |
else: | |
return 'Error!' | |
def xxxCreatURL(UID): | |
#http://www.dmzj.com/info/zhenlizhitu.html | |
return 'http://www.dmzj.com/info/'+UID+'.html' | |
def GetCover(URL): | |
Page=requests.get(URL) | |
Page=Page.content | |
PageSoup=BeautifulSoup(Page) | |
PageSoup=PageSoup.find('div',{'class':'comic_i_img'}) | |
CoverURL=PageSoup.a.img['src'] | |
print CoverURL | |
#<div class="comic_i_img"> | |
def MakeIndex(URL): | |
Page=requests.get(URL).content | |
PageSoup=BeautifulSoup(Page)#div class="cartoon_online_border" | |
PageSoup=PageSoup.findAll('div',{'class':'cartoon_online_border'}) | |
ToReturn=[] | |
for border in PageSoup: | |
for liItem in border.findAll('li'): | |
Link=liItem.a['href'] | |
Title=liItem.a.text | |
ToReturn.append([Link,Title]) | |
Log('Make index success!') | |
return ToReturn | |
def Downloader(IndexItem,Title): | |
try: | |
os.mkdir('_DataBase') | |
except: | |
pass | |
try: | |
os.mkdir('_DataBase/'+Title) | |
except: | |
pass | |
Page=requests.get('http://manhua.dmzj.com/'+IndexItem[0]) | |
Page=Page.content | |
PageSoup=BeautifulSoup(Page) | |
#Episode=PageSoup.find('span',{'class':'redhotl'}) | |
Episode=IndexItem[1] | |
List = os.listdir('_DataBase/'+Title) | |
#Episode=Episode.text.replace(u'第','').replace(u'话','') | |
Log('Download comic <<'+Title+'>> '+'.The episode '+Episode+'.') | |
try: | |
os.mkdir('_DataBase/'+Title+'/'+Episode) | |
except: | |
pass | |
Script=PageSoup.find('script',{'type':'text/javascript'}).text.replace('return p','console.log(p)') | |
FileHandle=open('Script.js','w') | |
Script=Script.encode('UTF-8').strip().split('\n')[2] | |
FileHandle.write(Script) | |
FileHandle.close() | |
RunScript='node Script.js' | |
Result=subprocess.check_output(RunScript,shell=True) | |
os.remove('Script.js') | |
URLs=Result.replace('var pages=pages=\'','').replace('"}\';','"}').replace("]';",'').replace('["','"').replace('"','').replace('\/','/').split(',') | |
def Downloader(IndexItem,Title): | |
try: | |
os.mkdir('_DataBase') | |
except: | |
pass | |
try: | |
os.mkdir('_DataBase/'+Title) | |
except: | |
pass | |
Page=requests.get('http://manhua.dmzj.com/'+IndexItem[0]) | |
Page=Page.content | |
PageSoup=BeautifulSoup(Page) | |
#Episode=PageSoup.find('span',{'class':'redhotl'}) | |
Episode=IndexItem[1] | |
List = os.listdir('_DataBase/'+Title) | |
#Episode=Episode.text.replace(u'第','').replace(u'话','') | |
Log('Download comic <<'+Title+'>> '+'.The episode '+Episode+'.') | |
try: | |
os.mkdir('_DataBase/'+Title+'/'+Episode) | |
except: | |
pass | |
Script=PageSoup.find('script',{'type':'text/javascript'}).text.replace('return p','console.log(p)') | |
FileHandle=open('Script.js','w') | |
Script=Script.encode('UTF-8').strip().split('\n')[2] | |
FileHandle.write(Script) | |
FileHandle.close() | |
RunScript='node Script.js' | |
Result=subprocess.check_output(RunScript,shell=True) | |
os.remove('Script.js') | |
URLs=Result.replace('var pages=pages=\'','').replace('"}\';','"}').replace("]';",'').replace('["','"').replace('"','').replace('\/','/').split(',') | |
Counter=1 | |
for URLItem in URLs: | |
Log('Downloading episode '+Episode+' :The '+str(Counter)+' in '+str(len(URLs))+'.') | |
Log('Debug '+ComicCDNHost+URLItem.replace('\n','')+'.') | |
File=requests.get(ComicCDNHost+URLItem.replace('\n',''),headers={'referer':'http://www.dmzj.com/','User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36'}) | |
if File.status_code==404: | |
print [ComicCDNHost+URLItem.replace('\n','')] | |
File=File.content | |
CounterStr=str(Counter) | |
ToAdd=3-len(str(Counter)) | |
while(ToAdd!=0): | |
CounterStr='0'+CounterStr | |
ToAdd-=1 | |
FileName=Title+'_'+Episode+'_'+CounterStr+'.jpg' | |
FileHandle=open('_DataBase/'+Title+'/'+Episode+'/'+FileName,'wb') | |
FileHandle.write(File) | |
FileHandle.close() | |
Counter+=1 | |
def StartTask(Condition,Index,Title,Path):#All#,Part#1-,Select# | |
ConditionFormat=Condition.lower() | |
if ConditionFormat=='all': | |
pass | |
else: | |
if ConditionFormat.split('#')[0]=='part': | |
ConditionFormat=ConditionFormat.split('#')[1] | |
if ConditionFormat.split('-')[1]=='': | |
Index=Index[:int(ConditionFormat.split('-')[0])] | |
else: | |
if ConditionFormat.split('-')[0]=='': | |
Index=Index[int(ConditionFormat.split('-')[1]):] | |
else: | |
pass | |
else: | |
if ConditionFormat.split('#')[0]=='select': | |
NewIndex=[] | |
ConditionFormat=ConditionFormat.split('#')[1] | |
ConditionFormat=ConditionFormat.split(',') | |
for Item in ConditionFormat: | |
NewIndex.append(Index[int(Item)-1]) | |
Index=NewIndex | |
else: | |
pass | |
for IndexItem in Index: | |
Downloader(IndexItem,Title) | |
#Index=MakeIndex('http://manhua.dmzj.com/lm12')[14:] | |
#for IndexItem in Index: | |
#Downloader(IndexItem,u'乱马50%') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment