Skip to content

Instantly share code, notes, and snippets.

/DMZJ_JP.py Secret

Created December 24, 2015 11:34
Show Gist options
  • Save anonymous/c95fd30a078b74e83e32 to your computer and use it in GitHub Desktop.
Save anonymous/c95fd30a078b74e83e32 to your computer and use it in GitHub Desktop.
#-*- coding:utf-8 -*-
import requests
import subprocess
import os
import json
import time
from bs4 import BeautifulSoup
WorkSpace='.'
ComicCDNHost='http://images.dmzj.com/'
def Log(String):
print time.strftime('%I:%M:%S',time.localtime(time.time()))+' - '+String
def xxxMakeFileSystemReady():
system('md _DataBase')
def xxxGenerateUID(URL):#http://www.dmzj.com/info/zhenlizhitu.htmlhttp://www.dmzj.com/view/zhenlizhitu/37273.html
Stage=URL.find('http://www.dmzj.com/')
if Stage!=-1:
Stage=URL.find('/info/')
if Stage!=-1:
ToReturn=URL.split('/info/')[1].split('.')[0]
return ToReturn
else:
ToReturn=URL.split('/view/')[1].split('/')[0]
return ToReturn
else:
return 'Error!'
def xxxCreatURL(UID):
#http://www.dmzj.com/info/zhenlizhitu.html
return 'http://www.dmzj.com/info/'+UID+'.html'
def GetCover(URL):
Page=requests.get(URL)
Page=Page.content
PageSoup=BeautifulSoup(Page)
PageSoup=PageSoup.find('div',{'class':'comic_i_img'})
CoverURL=PageSoup.a.img['src']
print CoverURL
#<div class="comic_i_img">
def MakeIndex(URL):
Page=requests.get(URL).content
PageSoup=BeautifulSoup(Page)#div class="cartoon_online_border"
PageSoup=PageSoup.findAll('div',{'class':'cartoon_online_border'})
ToReturn=[]
for border in PageSoup:
for liItem in border.findAll('li'):
Link=liItem.a['href']
Title=liItem.a.text
ToReturn.append([Link,Title])
Log('Make index success!')
return ToReturn
def Downloader(IndexItem,Title):
try:
os.mkdir('_DataBase')
except:
pass
try:
os.mkdir('_DataBase/'+Title)
except:
pass
Page=requests.get('http://manhua.dmzj.com/'+IndexItem[0])
Page=Page.content
PageSoup=BeautifulSoup(Page)
#Episode=PageSoup.find('span',{'class':'redhotl'})
Episode=IndexItem[1]
List = os.listdir('_DataBase/'+Title)
#Episode=Episode.text.replace(u'第','').replace(u'话','')
Log('Download comic <<'+Title+'>> '+'.The episode '+Episode+'.')
try:
os.mkdir('_DataBase/'+Title+'/'+Episode)
except:
pass
Script=PageSoup.find('script',{'type':'text/javascript'}).text.replace('return p','console.log(p)')
FileHandle=open('Script.js','w')
Script=Script.encode('UTF-8').strip().split('\n')[2]
FileHandle.write(Script)
FileHandle.close()
RunScript='node Script.js'
Result=subprocess.check_output(RunScript,shell=True)
os.remove('Script.js')
URLs=Result.replace('var pages=pages=\'','').replace('"}\';','"}').replace("]';",'').replace('["','"').replace('"','').replace('\/','/').split(',')
def Downloader(IndexItem,Title):
try:
os.mkdir('_DataBase')
except:
pass
try:
os.mkdir('_DataBase/'+Title)
except:
pass
Page=requests.get('http://manhua.dmzj.com/'+IndexItem[0])
Page=Page.content
PageSoup=BeautifulSoup(Page)
#Episode=PageSoup.find('span',{'class':'redhotl'})
Episode=IndexItem[1]
List = os.listdir('_DataBase/'+Title)
#Episode=Episode.text.replace(u'第','').replace(u'话','')
Log('Download comic <<'+Title+'>> '+'.The episode '+Episode+'.')
try:
os.mkdir('_DataBase/'+Title+'/'+Episode)
except:
pass
Script=PageSoup.find('script',{'type':'text/javascript'}).text.replace('return p','console.log(p)')
FileHandle=open('Script.js','w')
Script=Script.encode('UTF-8').strip().split('\n')[2]
FileHandle.write(Script)
FileHandle.close()
RunScript='node Script.js'
Result=subprocess.check_output(RunScript,shell=True)
os.remove('Script.js')
URLs=Result.replace('var pages=pages=\'','').replace('"}\';','"}').replace("]';",'').replace('["','"').replace('"','').replace('\/','/').split(',')
Counter=1
for URLItem in URLs:
Log('Downloading episode '+Episode+' :The '+str(Counter)+' in '+str(len(URLs))+'.')
Log('Debug '+ComicCDNHost+URLItem.replace('\n','')+'.')
File=requests.get(ComicCDNHost+URLItem.replace('\n',''),headers={'referer':'http://www.dmzj.com/','User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36'})
if File.status_code==404:
print [ComicCDNHost+URLItem.replace('\n','')]
File=File.content
CounterStr=str(Counter)
ToAdd=3-len(str(Counter))
while(ToAdd!=0):
CounterStr='0'+CounterStr
ToAdd-=1
FileName=Title+'_'+Episode+'_'+CounterStr+'.jpg'
FileHandle=open('_DataBase/'+Title+'/'+Episode+'/'+FileName,'wb')
FileHandle.write(File)
FileHandle.close()
Counter+=1
def StartTask(Condition,Index,Title,Path):#All#,Part#1-,Select#
ConditionFormat=Condition.lower()
if ConditionFormat=='all':
pass
else:
if ConditionFormat.split('#')[0]=='part':
ConditionFormat=ConditionFormat.split('#')[1]
if ConditionFormat.split('-')[1]=='':
Index=Index[:int(ConditionFormat.split('-')[0])]
else:
if ConditionFormat.split('-')[0]=='':
Index=Index[int(ConditionFormat.split('-')[1]):]
else:
pass
else:
if ConditionFormat.split('#')[0]=='select':
NewIndex=[]
ConditionFormat=ConditionFormat.split('#')[1]
ConditionFormat=ConditionFormat.split(',')
for Item in ConditionFormat:
NewIndex.append(Index[int(Item)-1])
Index=NewIndex
else:
pass
for IndexItem in Index:
Downloader(IndexItem,Title)
#Index=MakeIndex('http://manhua.dmzj.com/lm12')[14:]
#for IndexItem in Index:
#Downloader(IndexItem,u'乱马50%')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment