Last active
October 27, 2017 02:15
-
-
Save shuijinliuxi/b786ddc6d22b321c489c80522a6a2885 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Tue May 17 23:05:35 2016 | |
@author: Oyc | |
""" | |
import os | |
import requests as req | |
from bs4 import BeautifulSoup as bs | |
import re | |
# 获取最新章节 | |
def get_last_chapter(titleNo): | |
page_link = 'http://www.cncomico.com/articleList.nhn?titleNo=' + str(titleNo) | |
rsp = req.get(page_link) | |
rsp.encoding = 'utf-8' | |
text = rsp.text | |
chapter_links = re.findall('"((http)s?://.*?(&vm=tit_als))"', text) | |
last_chapter_link = chapter_links[0][0] | |
last_chapter_num = int(re.findall("articleNo=(.+?)&vm=tit_als", last_chapter_link)[0]) | |
return last_chapter_num | |
# 获取特定章节的内容 | |
def get_chapter_data(chapter_url): | |
rsp = req.get(chapter_url) | |
rsp.encoding = 'utf-8' | |
text = rsp.text | |
image_data = re.findall('"((http)s?://comicimg.*?(jpg))"', text) | |
image_links = [] | |
for item in image_data: | |
image_links.append(item[0]) | |
return image_links; | |
# 下载指定链接内容 | |
def download_files(urls, dir_path): | |
if os.path.exists(dir_path) == False: | |
os.makedirs(dir_path) | |
for idx, link in enumerate(urls): | |
book_name = link.split('/')[-1] | |
full_name = dir_path + book_name | |
if os.path.isfile(full_name) == False: | |
r = req.get(link, stream=True) | |
with open(full_name, 'wb') as f: | |
for chunk in r.iter_content(chunk_size=1024): | |
if chunk: | |
f.write(chunk) | |
f.flush() | |
f.close() | |
print('download ' + book_name + ' finished') | |
titleNo = 1; | |
base_url = 'http://www.cncomico.com/detail.nhn?titleNo=' + str(titleNo) + '&articleNo=%d&vm=tit_als' | |
last_chapter = get_last_chapter(titleNo) | |
last_index = last_chapter + 1; | |
for index in range(1, last_index): | |
chapter_url = base_url %(index) | |
print(chapter_url) | |
chapter_data = get_chapter_data(chapter_url) | |
download_files(chapter_data, './relife/chap' + str(index) + '/') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Tue May 17 23:05:35 2016 | |
@author: Oyc | |
""" | |
import os | |
import requests as req | |
from bs4 import BeautifulSoup as bs | |
import re | |
# 获取最新章节 | |
def get_last_chapter(titleNo): | |
page_link = 'http://www.comico.jp/articleList.nhn?titleNo=' + str(titleNo) | |
rsp = req.get(page_link) | |
rsp.encoding = 'utf-8' | |
text = rsp.text | |
chapter_links = re.findall('"((http)s?://.*?(&articleNo=)\d.*?)"', text) | |
last_chapter_link = chapter_links[0][0] | |
last_chapter_num = int(re.findall("articleNo=(\d+)", last_chapter_link)[0]) | |
return last_chapter_num | |
# 获取特定章节的内容 | |
def get_chapter_data(chapter_url): | |
rsp = req.get(chapter_url) | |
rsp.encoding = 'utf-8' | |
text = rsp.text | |
image_data = re.findall('"((http)s?://comicimg.comico.jp/pc.*?(jpg))"', text) | |
image_links = [] | |
for item in image_data: | |
image_links.append(item[0]) | |
return image_links; | |
# 下载指定链接内容 | |
def download_files(urls, dir_path): | |
if os.path.exists(dir_path) == False: | |
os.makedirs(dir_path) | |
for idx, link in enumerate(urls): | |
book_name = link.split('/')[-1] | |
full_name = dir_path + book_name | |
if os.path.isfile(full_name) == False: | |
r = req.get(link, stream=True) | |
with open(full_name, 'wb') as f: | |
for chunk in r.iter_content(chunk_size=1024): | |
if chunk: | |
f.write(chunk) | |
f.flush() | |
f.close() | |
print('download ' + book_name + ' finished') | |
titleNo = 2; | |
base_url = 'http://www.comico.jp/detail.nhn?titleNo=' + str(titleNo) + '&articleNo=%d' | |
last_chapter = get_last_chapter(titleNo) | |
last_index = last_chapter + 1; | |
for index in range(1, last_index): | |
chapter_url = base_url %(index) | |
print(chapter_url) | |
chapter_data = get_chapter_data(chapter_url) | |
download_files(chapter_data, './relife_jp/chap' + str(index) + '/') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment