Skip to content

Instantly share code, notes, and snippets.

/0.py

Created January 8, 2016 19:32
Show Gist options
  • Save anonymous/4b8fd3bfe0d4560d09a9 to your computer and use it in GitHub Desktop.
Save anonymous/4b8fd3bfe0d4560d09a9 to your computer and use it in GitHub Desktop.
#-*- coding:utf-8 -*-
import requests
import re
import os
import webbrowser
import time
RE_URL = "http://www.dm5.com/m([0-9]*)/"
RE_PAGE = 'm[0-9]*?-p([0-9]*?)/'
info_base = 'http://www.dm5.com/m%s/chapterfun.ashx?cid=%s&page=%s'
def get_url(url):
if os.popen('node') == '':
print 'Error - node required'
return 'E',[]
urls = re.findall(RE_URL,url)
if urls == []:
print 'Error - illegal url'
return 'E',[]
else:
id = urls[0]
webbrowser.open_new(url)
time.sleep(3)
session = requests.session()
page = session.get(url).content
page_no = re.findall(RE_PAGE,page)
return_list = []
max = 0
for no in page_no:
if int(no)>max:
max = int(no)
for i in range(1,max+1):
page_url = info_base%(id,id,i,)
Script = session.get(page_url).content#.replace('return p','console.log(p)')
print Script
FileHandle=open('Script.js','w')
Script='console.log('+Script.strip()+')'
FileHandle.write(Script)
FileHandle.close()
RunScript='node Script.js'
Result=os.popen(RunScript).read()
os.remove('Script.js')
try:
temp_list = eval(Result)
except:
print Result
return_list = return_list + temp_list
return return_list
get_url('http://www.dm5.com/m178597/')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment