Skip to content

Instantly share code, notes, and snippets.

@shuijinliuxi
Last active October 27, 2017 02:15
Show Gist options
  • Save shuijinliuxi/b786ddc6d22b321c489c80522a6a2885 to your computer and use it in GitHub Desktop.
Save shuijinliuxi/b786ddc6d22b321c489c80522a6a2885 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
Created on Tue May 17 23:05:35 2016
@author: Oyc
"""
import os
import requests as req
from bs4 import BeautifulSoup as bs
import re
# 获取最新章节
def get_last_chapter(titleNo):
page_link = 'http://www.cncomico.com/articleList.nhn?titleNo=' + str(titleNo)
rsp = req.get(page_link)
rsp.encoding = 'utf-8'
text = rsp.text
chapter_links = re.findall('"((http)s?://.*?(&vm=tit_als))"', text)
last_chapter_link = chapter_links[0][0]
last_chapter_num = int(re.findall("articleNo=(.+?)&vm=tit_als", last_chapter_link)[0])
return last_chapter_num
# 获取特定章节的内容
def get_chapter_data(chapter_url):
rsp = req.get(chapter_url)
rsp.encoding = 'utf-8'
text = rsp.text
image_data = re.findall('"((http)s?://comicimg.*?(jpg))"', text)
image_links = []
for item in image_data:
image_links.append(item[0])
return image_links;
# 下载指定链接内容
def download_files(urls, dir_path):
if os.path.exists(dir_path) == False:
os.makedirs(dir_path)
for idx, link in enumerate(urls):
book_name = link.split('/')[-1]
full_name = dir_path + book_name
if os.path.isfile(full_name) == False:
r = req.get(link, stream=True)
with open(full_name, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
f.flush()
f.close()
print('download ' + book_name + ' finished')
titleNo = 1;
base_url = 'http://www.cncomico.com/detail.nhn?titleNo=' + str(titleNo) + '&articleNo=%d&vm=tit_als'
last_chapter = get_last_chapter(titleNo)
last_index = last_chapter + 1;
for index in range(1, last_index):
chapter_url = base_url %(index)
print(chapter_url)
chapter_data = get_chapter_data(chapter_url)
download_files(chapter_data, './relife/chap' + str(index) + '/')
# -*- coding: utf-8 -*-
"""
Created on Tue May 17 23:05:35 2016
@author: Oyc
"""
import os
import requests as req
from bs4 import BeautifulSoup as bs
import re
# 获取最新章节
def get_last_chapter(titleNo):
page_link = 'http://www.comico.jp/articleList.nhn?titleNo=' + str(titleNo)
rsp = req.get(page_link)
rsp.encoding = 'utf-8'
text = rsp.text
chapter_links = re.findall('"((http)s?://.*?(&articleNo=)\d.*?)"', text)
last_chapter_link = chapter_links[0][0]
last_chapter_num = int(re.findall("articleNo=(\d+)", last_chapter_link)[0])
return last_chapter_num
# 获取特定章节的内容
def get_chapter_data(chapter_url):
rsp = req.get(chapter_url)
rsp.encoding = 'utf-8'
text = rsp.text
image_data = re.findall('"((http)s?://comicimg.comico.jp/pc.*?(jpg))"', text)
image_links = []
for item in image_data:
image_links.append(item[0])
return image_links;
# 下载指定链接内容
def download_files(urls, dir_path):
if os.path.exists(dir_path) == False:
os.makedirs(dir_path)
for idx, link in enumerate(urls):
book_name = link.split('/')[-1]
full_name = dir_path + book_name
if os.path.isfile(full_name) == False:
r = req.get(link, stream=True)
with open(full_name, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
f.flush()
f.close()
print('download ' + book_name + ' finished')
titleNo = 2;
base_url = 'http://www.comico.jp/detail.nhn?titleNo=' + str(titleNo) + '&articleNo=%d'
last_chapter = get_last_chapter(titleNo)
last_index = last_chapter + 1;
for index in range(1, last_index):
chapter_url = base_url %(index)
print(chapter_url)
chapter_data = get_chapter_data(chapter_url)
download_files(chapter_data, './relife_jp/chap' + str(index) + '/')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment