Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jennyonjourney/659bd1152fe516bc16c3d63d12c26dd9 to your computer and use it in GitHub Desktop.
Save jennyonjourney/659bd1152fe516bc16c3d63d12c26dd9 to your computer and use it in GitHub Desktop.
Python - Web crawling (csv파일로 저장하기)
import requests
from lxml.html import parse
from io import StringIO
def rowData(rr, kind):
cols = rr.findall('.//'+kind)
res = [vv.text_content().replace("\t","").replace("\n","") for vv in cols]
return res[:-1]
def rowWrite(rr):
res =''
cnt =0
for i in rr:
cnt += 1
res+=i
if len(rr)>cnt:
res+=','
res +='\n'
return res
f=open('../fff/webData.csv','w', encoding='utf-8')
for i in range(1,30):
url='http://finance.naver.com/sise/sise_market_sum.nhn?page=%d'%i
text = requests.get(url)
ppp = parse(StringIO(text.text))
doc = ppp.getroot()
tables = doc.findall('.//table')
#print(tables) ###표들
tt = tables[1] ###표1번째
rows = tt.findall('.//tr') ###줄들
for row in rows[2:]:
rr = rowData(row, 'td');
if len(rr)>2:
print(rr)
f.write(rowWrite(rr))
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment