|
#!/usr/bin/env python |
|
# -*- coding: utf-8 -*- |
|
|
|
from __future__ import unicode_literals |
|
import re |
|
import os |
|
from subprocess import check_output |
|
import json |
|
import sys |
|
from selenium import webdriver |
|
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities |
|
reload(sys) |
|
sys.setdefaultencoding("utf-8") |
|
|
|
def create_driver(): |
|
|
|
desired_capabilities = DesiredCapabilities.PHANTOMJS.copy() |
|
desired_capabilities['phantomjs.page.customHeaders.User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; WOW64) ' \ |
|
'AppleWebKit/537.36 (KHTML, like Gecko) ' \ |
|
'Chrome/54.0.2840.99 Safari/537.36' |
|
driver = webdriver.PhantomJS(desired_capabilities=desired_capabilities) |
|
driver.set_window_size(1903, 1016) |
|
return driver |
|
|
|
|
|
def single_chapter(driver, url, current_directory): |
|
print "Single URL : ",url |
|
|
|
driver.get(url) |
|
|
|
try: |
|
element = WebDriverWait(driver, 10).until( |
|
EC.presence_of_element_located((By.ID, "content")) |
|
) |
|
|
|
except Exception as e: |
|
pass |
|
|
|
page_title = str(driver.title) |
|
print page_title |
|
|
|
elem = driver.find_element_by_xpath("//*") |
|
Page_Source = elem.get_attribute("outerHTML").encode('utf-8') |
|
|
|
with open("Comic_Naver.html","w") as sf: |
|
sf.write(str(Page_Source)) |
|
sf.flush() |
|
|
|
#all_links = list(re.findall('https?://(?P<host>imgcomic.naver.net)/webtoon/(?P<first_charac>[\d]{6})/(?P<scnd_charac>[\d]{2})/(?P<long_frst>[\d]+)\_(?P<long_scnd>[\w\d]+)\_(?P<Img_Name>[\w\d\_\d]+)\.((jpg)|(png))$',Page_Source)) |
|
all_links = list(re.findall('imgcomic\.naver\.net(.*)\"\ title\=',Page_Source)) |
|
#print all_links |
|
|
|
for x in all_links: |
|
ddl_image = "http://imgcomic.naver.net"+str(x) |
|
print ddl_image |
|
driver.get(ddl_image) |
|
driver.save_screenshot("try_1.png") |
|
sys.exit() |
|
|
|
|
|
def whole_series(driver, url, current_directory): |
|
# pass |
|
print "Whole URL : ", url |
|
|
|
|
|
def comic_naver_Url_Check(input_url, current_directory): |
|
|
|
comic_naver_single_regex = re.compile( |
|
'https?://(?P<host>comic.naver.com)/webtoon/(?P<detail>detail.nhn)\?titleId\=(?P<extra_characters>[\d]+)?(\/|.)') |
|
comic_naver_whole_regex = re.compile( |
|
'https?://(?P<host>comic.naver.com)/webtoon/(?P<list>list.nhn)\?titleId\=(?P<extra_characters>[\d]+)?(\/|.)') |
|
|
|
lines = input_url.split('\n') |
|
for line in lines: |
|
found = re.search(comic_naver_single_regex, line) |
|
if found: |
|
match = found.groupdict() |
|
if match['detail']: |
|
url = str(input_url) |
|
driver = create_driver() |
|
#driver = "LOL" |
|
try: |
|
single_chapter(driver, url, current_directory) |
|
except Exception as e: |
|
print e |
|
driver.quit() |
|
driver.quit() |
|
else: |
|
pass |
|
|
|
found = re.search(comic_naver_whole_regex, line) |
|
if found: |
|
match = found.groupdict() |
|
if match['list']: |
|
url = str(input_url) |
|
#driver = create_driver() |
|
driver = "LOL" |
|
whole_series(driver, url, current_directory) |
|
driver.quit() |
|
else: |
|
pass |
|
|
|
current_directory = str(os.getcwd()) |
|
input_url = raw_input("Enter Your URL : ") |
|
comic_naver_Url_Check(input_url, current_directory): |