Skip to content

Instantly share code, notes, and snippets.

@naturale0
Created May 6, 2017 14:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save naturale0/ce8fa27f0c8b28cc0c5a63f98a66beab to your computer and use it in GitHub Desktop.
Save naturale0/ce8fa27f0c8b28cc0c5a63f98a66beab to your computer and use it in GitHub Desktop.
네이버 금융에서 일별 종가를 읽어오는 클래스 (http://estenpark.tistory.com/353 참고 - 이걸 그대로 클래스로 구현함)
## http://estenpark.tistory.com/353 참고 (거의 그대로 가져옴)
import urllib
import time
import sys
from urllib import urlopen
from bs4 import BeautifulSoup
class NaverStockCrawler(object):
"""네이버 금융에서 일별 종가를 읽어오는 클래스"""
def __init__(self, stock_item):
self.stock_item = stock_item
self.url = 'http://finance.naver.com/item/sise_day.nhn?code='+ stock_item
self.items = OrderedDict()
def get(self, days=40):
url = 'http://finance.naver.com/item/sise_day.nhn?code='+ self.stock_item
html = urlopen(self.url)
source = BeautifulSoup(html.read(), "html.parser")
maxPage=source.find_all("table", align="center")
mp = maxPage[0].find_all("td", class_="pgRR")
mpNum = int(mp[0].a.get('href')[-3:])
pages = days//10 if days%10==0 else days//10 + 1
for page in range(1, pages+1):
sys.stdout.write("\r* crawling page " + str(page) + "...")
sys.stdout.flush()
page_url = self.url +'&page='+ str(page)
html = urlopen(page_url)
source = BeautifulSoup(html.read(), "html.parser")
srlists=source.find_all("tr")
isCheckNone = None
if((page % 1) == 0):
time.sleep(1.40)
for i in range(1,len(srlists)-1):
if(srlists[i].span != isCheckNone):
date = srlists[i].find_all("td",align="center")[0].text
price = srlists[i].find_all("td",class_="num")[0].text
self.items[date] = float(price.replace(",", ""))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment