shuijinliuxi/comico.py

## comico.py
# -*- coding: utf-8 -*-
"""
Created on Tue May 17 23:05:35 2016

@author: Oyc
"""

import os
import requests as req
from bs4 import BeautifulSoup as bs
import re

# 获取最新章节
def get_last_chapter(titleNo):
	page_link = 'http://www.cncomico.com/articleList.nhn?titleNo=' + str(titleNo)
	rsp = req.get(page_link)
	rsp.encoding = 'utf-8'
	text = rsp.text
	chapter_links = re.findall('"((http)s?://.*?(&vm=tit_als))"', text)
	last_chapter_link = chapter_links[0][0]
	last_chapter_num = int(re.findall("articleNo=(.+?)&vm=tit_als", last_chapter_link)[0])
	return last_chapter_num

# 获取特定章节的内容
def get_chapter_data(chapter_url):
	rsp = req.get(chapter_url)
	rsp.encoding = 'utf-8'
	text = rsp.text
	image_data = re.findall('"((http)s?://comicimg.*?(jpg))"', text)
	image_links = []
	for item in image_data:
		image_links.append(item[0])
	return image_links;

# 下载指定链接内容
def download_files(urls, dir_path):
    if os.path.exists(dir_path) == False:
        os.makedirs(dir_path)
    for idx, link in enumerate(urls):
        book_name = link.split('/')[-1]
        full_name = dir_path + book_name
        if os.path.isfile(full_name) == False:
            r = req.get(link, stream=True)
            with open(full_name, 'wb') as f:
            	for chunk in r.iter_content(chunk_size=1024):
            		if chunk:
            			f.write(chunk)
            			f.flush()
            	f.close()
            	print('download ' + book_name + ' finished')

titleNo = 1;
base_url = 'http://www.cncomico.com/detail.nhn?titleNo=' + str(titleNo) + '&articleNo=%d&vm=tit_als'
last_chapter = get_last_chapter(titleNo)
last_index = last_chapter + 1;
for index in range(1, last_index):
	chapter_url = base_url %(index)
	print(chapter_url)
	chapter_data = get_chapter_data(chapter_url)
	download_files(chapter_data, './relife/chap' + str(index) + '/')

## comico_jp.py
# -*- coding: utf-8 -*-
"""
Created on Tue May 17 23:05:35 2016

@author: Oyc
"""

import os
import requests as req
from bs4 import BeautifulSoup as bs
import re

# 获取最新章节
def get_last_chapter(titleNo):
	page_link = 'http://www.comico.jp/articleList.nhn?titleNo=' + str(titleNo)
	rsp = req.get(page_link)
	rsp.encoding = 'utf-8'
	text = rsp.text
	chapter_links = re.findall('"((http)s?://.*?(&articleNo=)\d.*?)"', text)
	last_chapter_link = chapter_links[0][0]
	last_chapter_num = int(re.findall("articleNo=(\d+)", last_chapter_link)[0])
	return last_chapter_num

# 获取特定章节的内容
def get_chapter_data(chapter_url):
	rsp = req.get(chapter_url)
	rsp.encoding = 'utf-8'
	text = rsp.text
	image_data = re.findall('"((http)s?://comicimg.comico.jp/pc.*?(jpg))"', text)
	image_links = []
	for item in image_data:
		image_links.append(item[0])
	return image_links;

# 下载指定链接内容
def download_files(urls, dir_path):
    if os.path.exists(dir_path) == False:
        os.makedirs(dir_path)
    for idx, link in enumerate(urls):
        book_name = link.split('/')[-1]
        full_name = dir_path + book_name
        if os.path.isfile(full_name) == False:
            r = req.get(link, stream=True)
            with open(full_name, 'wb') as f:
            	for chunk in r.iter_content(chunk_size=1024):
            		if chunk:
            			f.write(chunk)
            			f.flush()
            	f.close()
            	print('download ' + book_name + ' finished')

titleNo = 2;
base_url = 'http://www.comico.jp/detail.nhn?titleNo=' + str(titleNo) + '&articleNo=%d'
last_chapter = get_last_chapter(titleNo)
last_index = last_chapter + 1;
for index in range(1, last_index):
	chapter_url = base_url %(index)
	print(chapter_url)
	chapter_data = get_chapter_data(chapter_url)
	download_files(chapter_data, './relife_jp/chap' + str(index) + '/')
	# -- coding: utf-8 --
	"""
	Created on Tue May 17 23:05:35 2016

	@author: Oyc
	"""

	import os
	import requests as req
	from bs4 import BeautifulSoup as bs
	import re

	# 获取最新章节
	def get_last_chapter(titleNo):
	page_link = 'http://www.cncomico.com/articleList.nhn?titleNo=' + str(titleNo)
	rsp = req.get(page_link)
	rsp.encoding = 'utf-8'
	text = rsp.text
	chapter_links = re.findall('"((http)s?://.*?(&vm=tit_als))"', text)
	last_chapter_link = chapter_links[0][0]
	last_chapter_num = int(re.findall("articleNo=(.+?)&vm=tit_als", last_chapter_link)[0])
	return last_chapter_num

	# 获取特定章节的内容
	def get_chapter_data(chapter_url):
	rsp = req.get(chapter_url)
	rsp.encoding = 'utf-8'
	text = rsp.text
	image_data = re.findall('"((http)s?://comicimg.*?(jpg))"', text)
	image_links = []
	for item in image_data:
	image_links.append(item[0])
	return image_links;

	# 下载指定链接内容
	def download_files(urls, dir_path):
	if os.path.exists(dir_path) == False:
	os.makedirs(dir_path)
	for idx, link in enumerate(urls):
	book_name = link.split('/')[-1]
	full_name = dir_path + book_name
	if os.path.isfile(full_name) == False:
	r = req.get(link, stream=True)
	with open(full_name, 'wb') as f:
	for chunk in r.iter_content(chunk_size=1024):
	if chunk:
	f.write(chunk)
	f.flush()
	f.close()
	print('download ' + book_name + ' finished')

	titleNo = 1;
	base_url = 'http://www.cncomico.com/detail.nhn?titleNo=' + str(titleNo) + '&articleNo=%d&vm=tit_als'
	last_chapter = get_last_chapter(titleNo)
	last_index = last_chapter + 1;
	for index in range(1, last_index):
	chapter_url = base_url %(index)
	print(chapter_url)
	chapter_data = get_chapter_data(chapter_url)
	download_files(chapter_data, './relife/chap' + str(index) + '/')