kujyp/scrap_corpcode.py

## scrap_corpcode.py
import requests


def get_header_names(decoded):
    ret = []

    first_tr_idx_st = decoded.find("<tr>")
    first_tr_idx_ed = decoded.find("</tr>")
    headers = decoded[first_tr_idx_st + len("<tr>"):first_tr_idx_ed]
    # print(headers)

    th_idx = 0
    while True:
        th_idx_st = headers.find("<th")
        if th_idx_st == -1:
            break
        th_closing_idx = th_idx_st + headers[th_idx_st:].find(">") + len(">")
        th_idx_ed = headers.find("</th>")
        th = headers[th_closing_idx:th_idx_ed]
        ret.append(th)
        headers = headers[th_idx_ed + len("</th>"):]
        th_idx += 1

    return ret


def get_data_as_lst(decoded):
    print(decoded[:2000])
    return [
    ["GS글로벌", "001250", "상품 종합 도매업", ],
    ["HSD엔진", "001250", "상품 종합 도매업", ],
    ]


if __name__ == '__main__':
    url = "http://kind.krx.co.kr/corpgeneral/corpList.do?\
method=download&searchType=13"
    res = requests.get(url)
    decoded = res.content.decode('euc-kr')
    header_names = get_header_names(decoded)
    # print(header_names)
    corpname_idx = header_names.index("회사명")
    corpcode_idx = header_names.index("종목코드")

    data = get_data_as_lst(decoded)
    for eachline in data:
        code = eachline[corpcode_idx]
        name = eachline[corpname_idx]
	import requests


	def get_header_names(decoded):
	ret = []

	first_tr_idx_st = decoded.find("<tr>")
	first_tr_idx_ed = decoded.find("</tr>")
	headers = decoded[first_tr_idx_st + len("<tr>"):first_tr_idx_ed]
	# print(headers)

	th_idx = 0
	while True:
	th_idx_st = headers.find("<th")
	if th_idx_st == -1:
	break
	th_closing_idx = th_idx_st + headers[th_idx_st:].find(">") + len(">")
	th_idx_ed = headers.find("</th>")
	th = headers[th_closing_idx:th_idx_ed]
	ret.append(th)
	headers = headers[th_idx_ed + len("</th>"):]
	th_idx += 1

	return ret


	def get_data_as_lst(decoded):
	print(decoded[:2000])
	return [
	["GS글로벌", "001250", "상품 종합 도매업", ],
	["HSD엔진", "001250", "상품 종합 도매업", ],
	]


	if __name__ == '__main__':
	url = "http://kind.krx.co.kr/corpgeneral/corpList.do?\
	method=download&searchType=13"
	res = requests.get(url)
	decoded = res.content.decode('euc-kr')
	header_names = get_header_names(decoded)
	# print(header_names)
	corpname_idx = header_names.index("회사명")
	corpcode_idx = header_names.index("종목코드")

	data = get_data_as_lst(decoded)
	for eachline in data:
	code = eachline[corpcode_idx]
	name = eachline[corpname_idx]