Created
July 1, 2019 12:48
-
-
Save kujyp/b9e6126d632da809d2979a2dd6d9d553 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
def get_header_names(decoded): | |
ret = [] | |
first_tr_idx_st = decoded.find("<tr>") | |
first_tr_idx_ed = decoded.find("</tr>") | |
headers = decoded[first_tr_idx_st + len("<tr>"):first_tr_idx_ed] | |
# print(headers) | |
th_idx = 0 | |
while True: | |
th_idx_st = headers.find("<th") | |
if th_idx_st == -1: | |
break | |
th_closing_idx = th_idx_st + headers[th_idx_st:].find(">") + len(">") | |
th_idx_ed = headers.find("</th>") | |
th = headers[th_closing_idx:th_idx_ed] | |
ret.append(th) | |
headers = headers[th_idx_ed + len("</th>"):] | |
th_idx += 1 | |
return ret | |
def get_data_as_lst(decoded): | |
print(decoded[:2000]) | |
return [ | |
["GS글로벌", "001250", "상품 종합 도매업", ], | |
["HSD엔진", "001250", "상품 종합 도매업", ], | |
] | |
if __name__ == '__main__': | |
url = "http://kind.krx.co.kr/corpgeneral/corpList.do?\ | |
method=download&searchType=13" | |
res = requests.get(url) | |
decoded = res.content.decode('euc-kr') | |
header_names = get_header_names(decoded) | |
# print(header_names) | |
corpname_idx = header_names.index("회사명") | |
corpcode_idx = header_names.index("종목코드") | |
data = get_data_as_lst(decoded) | |
for eachline in data: | |
code = eachline[corpcode_idx] | |
name = eachline[corpname_idx] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment