nomadekr/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Azure Function을 활용해서, 네이버 웹툰 웹데이트 현황 크롤링해서 HTML로 보여주기


새 함수 앱

Http Trigger/Output으로 생성
권한수준은 Anonymous로 지정


생성된 함수앱의 통합 메뉴

선택한 HTTP 메서드에 GET추가


동작 페이지 : https://snu2017.azurewebsites.net/api/HttpTriggerPython31
실행에 앞서

파이썬 3 설치 및 라이브러리 설치가 필요합니다. 함수 앱 > 플랫폼 기능 > 고급도구 (KUDU) > Debug Console > CMD 에서 아래 명령을 하나씩 실행주세요.
# Python 3.6 설치
쉘> nuget.exe install -Source https://www.siteextensions.net/api/v2/ -OutputDirectory D:\home\site\tools python361x64

# Azure Function에서 접근가능한 경로에 Python 명령 복사
쉘> mv /d/home/site/tools/python361x64.3.6.1.3/content/python361x64/* /d/home/site/tools/

# 파이썬 팩키지 설치
쉘> d:/home/site/tools/python -m pip install requests beautifulsoup4 jinja2


AskDjango 페이스북 그룹

  
## list.html
<html>
<head>
<meta charset="utf8" />
<title>오늘의 네이버 웹툰</title>
<link rel="stylesheet" href="//maxcdn.bootstrapcdn.com/bootswatch/3.3.7/cosmo/bootstrap.min.css" />
</head>
<body>

<div class="container">
    <div class="row">
        <div class="col-sm-12">
            <div class="jumbotron">
                <h1>오늘의 네이버 웹툰</h1>
                <p>
                    <a class="btn btn-primary btn-lg" href="http://comic.naver.com/webtoon/weekday.nhn" target="_blank">
                    사이트로 이동
                    </a>
                </p>
            </div>

            <div class="panel panel-default">
                <div class="panel-body">
                    {% for comic in comic_list %}
                        <div class="media">
                            <div class="media-left">
                                <a href="{{ comic.url }}" target="_blank">
                                    <img class="media-object" src="{{ comic.img.src }}" />
                                </a>
                            </div>
                            <div class="media-body">
                                <h4 class="media-heading">{{ comic.title }}</h4>
                            </div>
                        </div>
                    {% endfor %}
                </div>
                <div class="panel-footer">
                    &copy; 2017. 여러분의 이름.
                </div>
            </div>
        </div>
    </div>
</div>

</body>

</html>

## run.py
import os
import json
import jinja2
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin


def main():
    list_url = 'http://comic.naver.com/webtoon/weekday.nhn'
    html = requests.get(list_url).text
    soup = BeautifulSoup(html, 'html.parser')

    comic_list = []

    for a_tag in soup.select('a[href*=list.nhn]'):
        if not a_tag.select('.ico_updt'):
            continue

        img_tag = a_tag.find('img')
        url = urljoin(list_url, a_tag['href'])
        title = img_tag.attrs['title']
        img_src = img_tag['src']
        # img_data = requests.get(img_src, headers={'Referer': list_url}).content

        comic_list.append({
            'title': title,
            'url': url,
            'img': {
                'src': img_src,
                # 'data': img_data,
            },
        })

    res_path = os.environ['res']
    with open(res_path, 'wt', encoding='utf8') as f:
        # jinja2 템플릿 엔진을 통한 조합
        template_string = open('list.html', 'rt', encoding='utf8').read()
        template = jinja2.Template(template_string)
        html = template.render(comic_list=comic_list)

        # 응답 구조
        return_dict = {
            "status": 200,   # 응답의 Status Code
            "body": html,    # 응답 내용 (body)
            "headers": {     # 응답 헤더
                "Content-Type": "text/html"
            }
        }
        f.write(json.dumps(return_dict))


if __name__ == '__main__':
    main()
	<html>
	<head>
	<meta charset="utf8" />
	<title>오늘의 네이버 웹툰</title>
	<link rel="stylesheet" href="//maxcdn.bootstrapcdn.com/bootswatch/3.3.7/cosmo/bootstrap.min.css" />
	</head>
	<body>

	<div class="container">
	<div class="row">
	<div class="col-sm-12">
	<div class="jumbotron">
	<h1>오늘의 네이버 웹툰</h1>
	<p>
	<a class="btn btn-primary btn-lg" href="http://comic.naver.com/webtoon/weekday.nhn" target="_blank">
	사이트로 이동
	</a>
	</p>
	</div>

	<div class="panel panel-default">
	<div class="panel-body">
	{% for comic in comic_list %}
	<div class="media">
	<div class="media-left">
	<a href="{{ comic.url }}" target="_blank">
	<img class="media-object" src="{{ comic.img.src }}" />
	</a>
	</div>
	<div class="media-body">
	<h4 class="media-heading">{{ comic.title }}</h4>
	</div>
	</div>
	{% endfor %}
	</div>
	<div class="panel-footer">
	© 2017. 여러분의 이름.
	</div>
	</div>
	</div>
	</div>
	</div>

	</body>

	</html>
	import os
	import json
	import jinja2
	import requests
	from bs4 import BeautifulSoup
	from urllib.parse import urljoin


	def main():
	list_url = 'http://comic.naver.com/webtoon/weekday.nhn'
	html = requests.get(list_url).text
	soup = BeautifulSoup(html, 'html.parser')

	comic_list = []

	for a_tag in soup.select('a[href*=list.nhn]'):
	if not a_tag.select('.ico_updt'):
	continue

	img_tag = a_tag.find('img')
	url = urljoin(list_url, a_tag['href'])
	title = img_tag.attrs['title']
	img_src = img_tag['src']
	# img_data = requests.get(img_src, headers={'Referer': list_url}).content

	comic_list.append({
	'title': title,
	'url': url,
	'img': {
	'src': img_src,
	# 'data': img_data,
	},
	})

	res_path = os.environ['res']
	with open(res_path, 'wt', encoding='utf8') as f:
	# jinja2 템플릿 엔진을 통한 조합
	template_string = open('list.html', 'rt', encoding='utf8').read()
	template = jinja2.Template(template_string)
	html = template.render(comic_list=comic_list)

	# 응답 구조
	return_dict = {
	"status": 200, # 응답의 Status Code
	"body": html, # 응답 내용 (body)
	"headers": { # 응답 헤더
	"Content-Type": "text/html"
	}
	}
	f.write(json.dumps(return_dict))


	if __name__ == '__main__':
	main()