Skip to content

Instantly share code, notes, and snippets.

@seominjoon
Last active February 19, 2018 03:54
Show Gist options
  • Save seominjoon/944cd645f931cb355f0e6169a6da750c to your computer and use it in GitHub Desktop.
Save seominjoon/944cd645f931cb355f0e6169a6da750c to your computer and use it in GitHub Desktop.
네이버 구찌 검색
import urllib.request
import time
import os
# 네이버 구찌 검색 url
url_base = 'https://search.naver.com/search.naver?date_from=&date_option=0&date_to=&dup_remove=1&nso=&post_blogurl=&post_blogurl_without=&query=%EA%B5%AC%EC%B0%8C&sm=tab_pge&srchby=all&st=sim&where=post&start='
folder_dir = '구찌'
os.makedirs(folder_dir)
N = 100
for page_num in range(N):
url = url_base + str(page_num * 10 + 1)
with urllib.request.urlopen(url) as fp:
mystr = fp.read().decode("utf8")
with open(os.path.join(folder_dir, '%d.html' % page_num), 'w', encoding='utf8') as fh:
fh.write(mystr)
print('page %d saved.' % (page_num + 1))
time.sleep(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment