Skip to content

Instantly share code, notes, and snippets.

@Xonshiz
Last active November 23, 2016 15:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Xonshiz/e8f7aee119936f9b66c2c752200d49ad to your computer and use it in GitHub Desktop.
Save Xonshiz/e8f7aee119936f9b66c2c752200d49ad to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
import os
from subprocess import check_output
import json
import sys
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
reload(sys)
sys.setdefaultencoding("utf-8")
def create_driver():
desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
desired_capabilities['phantomjs.page.customHeaders.User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; WOW64) ' \
'AppleWebKit/537.36 (KHTML, like Gecko) ' \
'Chrome/54.0.2840.99 Safari/537.36'
driver = webdriver.PhantomJS(desired_capabilities=desired_capabilities)
driver.set_window_size(1903, 1016)
return driver
def single_chapter(driver, url, current_directory):
print "Single URL : ",url
driver.get(url)
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "content"))
)
except Exception as e:
pass
page_title = str(driver.title)
print page_title
elem = driver.find_element_by_xpath("//*")
Page_Source = elem.get_attribute("outerHTML").encode('utf-8')
with open("Comic_Naver.html","w") as sf:
sf.write(str(Page_Source))
sf.flush()
#all_links = list(re.findall('https?://(?P<host>imgcomic.naver.net)/webtoon/(?P<first_charac>[\d]{6})/(?P<scnd_charac>[\d]{2})/(?P<long_frst>[\d]+)\_(?P<long_scnd>[\w\d]+)\_(?P<Img_Name>[\w\d\_\d]+)\.((jpg)|(png))$',Page_Source))
all_links = list(re.findall('imgcomic\.naver\.net(.*)\"\ title\=',Page_Source))
#print all_links
for x in all_links:
ddl_image = "http://imgcomic.naver.net"+str(x)
print ddl_image
driver.get(ddl_image)
driver.save_screenshot("try_1.png")
sys.exit()
def whole_series(driver, url, current_directory):
# pass
print "Whole URL : ", url
def comic_naver_Url_Check(input_url, current_directory):
comic_naver_single_regex = re.compile(
'https?://(?P<host>comic.naver.com)/webtoon/(?P<detail>detail.nhn)\?titleId\=(?P<extra_characters>[\d]+)?(\/|.)')
comic_naver_whole_regex = re.compile(
'https?://(?P<host>comic.naver.com)/webtoon/(?P<list>list.nhn)\?titleId\=(?P<extra_characters>[\d]+)?(\/|.)')
lines = input_url.split('\n')
for line in lines:
found = re.search(comic_naver_single_regex, line)
if found:
match = found.groupdict()
if match['detail']:
url = str(input_url)
driver = create_driver()
#driver = "LOL"
try:
single_chapter(driver, url, current_directory)
except Exception as e:
print e
driver.quit()
driver.quit()
else:
pass
found = re.search(comic_naver_whole_regex, line)
if found:
match = found.groupdict()
if match['list']:
url = str(input_url)
#driver = create_driver()
driver = "LOL"
whole_series(driver, url, current_directory)
driver.quit()
else:
pass
current_directory = str(os.getcwd())
input_url = raw_input("Enter Your URL : ")
comic_naver_Url_Check(input_url, current_directory):
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment