Skip to content

Instantly share code, notes, and snippets.

@jennyonjourney
Last active April 1, 2018 03:22
Show Gist options
  • Save jennyonjourney/804b2c2aaa17722d00572e26c0a6b903 to your computer and use it in GitHub Desktop.
Save jennyonjourney/804b2c2aaa17722d00572e26c0a6b903 to your computer and use it in GitHub Desktop.
Python - web data crawling
import requests
from lxml.html import parse
from io import StringIO
url='http://finance.naver.com/sise/sise_market_sum.nhn'
text = requests.get(url)
ppp = parse(StringIO(text.text))
doc = ppp.getroot()
tables = doc.findall('.//table')
#print(tables) ###표들
tt = tables[1] ###표1번째
rows = tt.findall('.//tr') ###줄들
def rowData(rr, kind):
cols = rr.findall('.//'+kind)
res = [vv.text_content().replace("\t","").replace("\n","") for vv in cols]
return res
def rowrite(rr):
res = '연습삼아\n'
return res
# res는 해당 글자를 합쳐서 내보내기만 하면 됨
f=open('../fff/webData.csv','w', encoding='utf-8')
for row in rows[2:]:
rr = rowData(row, 'td');
if len(rr)>2:
print(rr)
f.write(rowrite(rr))
f.close()
print('-------이제 본격적으로 증권정보를 csv로 저장하자--------')
# res는 해당 글자를 합쳐서 내보내기만 하면 됨
def rowWrite(rr):
res =''
cnt =0
for i in rr:
cnt += 1
res+=i
if len(rr)>cnt:
res+=','
res +='\n'
return res
f=open('../fff/webData.csv','w', encoding='utf-8')
for row in rows[2:]:
rr = rowData(row, 'td');
if len(rr)>2:
print(rr)
f.write(rowWrite(rr))
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment