Skip to content

Instantly share code, notes, and snippets.

@kujyp
Created July 1, 2019 12:48
Show Gist options
  • Save kujyp/b9e6126d632da809d2979a2dd6d9d553 to your computer and use it in GitHub Desktop.
Save kujyp/b9e6126d632da809d2979a2dd6d9d553 to your computer and use it in GitHub Desktop.
import requests
def get_header_names(decoded):
ret = []
first_tr_idx_st = decoded.find("<tr>")
first_tr_idx_ed = decoded.find("</tr>")
headers = decoded[first_tr_idx_st + len("<tr>"):first_tr_idx_ed]
# print(headers)
th_idx = 0
while True:
th_idx_st = headers.find("<th")
if th_idx_st == -1:
break
th_closing_idx = th_idx_st + headers[th_idx_st:].find(">") + len(">")
th_idx_ed = headers.find("</th>")
th = headers[th_closing_idx:th_idx_ed]
ret.append(th)
headers = headers[th_idx_ed + len("</th>"):]
th_idx += 1
return ret
def get_data_as_lst(decoded):
print(decoded[:2000])
return [
["GS글로벌", "001250", "상품 종합 도매업", ],
["HSD엔진", "001250", "상품 종합 도매업", ],
]
if __name__ == '__main__':
url = "http://kind.krx.co.kr/corpgeneral/corpList.do?\
method=download&searchType=13"
res = requests.get(url)
decoded = res.content.decode('euc-kr')
header_names = get_header_names(decoded)
# print(header_names)
corpname_idx = header_names.index("회사명")
corpcode_idx = header_names.index("종목코드")
data = get_data_as_lst(decoded)
for eachline in data:
code = eachline[corpcode_idx]
name = eachline[corpname_idx]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment