Skip to content

Instantly share code, notes, and snippets.

@yevgnenll
Created March 22, 2016 15:30
Show Gist options
  • Save yevgnenll/59f4706f3f00c63b10e0 to your computer and use it in GitHub Desktop.
Save yevgnenll/59f4706f3f00c63b10e0 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
class NaverCrawling:
def __init__(self):
self.naver_blog = "https://search.naver.com/search.naver?where=post&ie=utf8&query={query}&start={start}"
def naver_blog_page(self, search_keyword, page):
start = (page - 1) * 10 + 1
find_url = self.naver_blog.format(
query=search_keyword,
start=start
)
html_doc = requests.get(find_url).content
dom = BeautifulSoup(html_doc, "html.parser")
blog_post_elements = dom.select('li.sh_blog_top')
link_list = []
title_list = []
for blog_post_element in blog_post_elements:
each_element = blog_post_element.select_one('a.sh_blog_title')
link = each_element.attrs.get('href')
title = each_element.attrs.get('title')
link_list.append(link)
title_list.append(title)
return {
'link' : link_list,
'title': title_list,
}
def naver_blog_page10(self, search_keyword):
start = 1
pages = 10
link_list = []
title_list = []
for page in range(start, pages+1):
ret_val = self.naver_blog_page(search_keyword, page)
link_list.append(ret_val.get('link'))
title_list.append(ret_val.get('title'))
return {
'link' : link_list,
'title': title_list,
}
a = NaverCrawling()
a.naver_blog_page("커피", 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment