Created
April 7, 2014 09:54
-
-
Save allieus/10017528 to your computer and use it in GitHub Desktop.
중첩된 CSV 데이터 파싱
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
def parse(bin): | |
raw_blocks = [] | |
block = [] | |
for line in bin.splitlines(): | |
line = line.strip() | |
if line.startswith('*'): | |
if block: | |
raw_blocks.append(block) | |
block = [line] | |
else: | |
block.append(line) | |
else: | |
if block: | |
raw_blocks.append(block) | |
blocks = [] | |
for block in raw_blocks: | |
name = block[0].strip('*').strip().split(':')[-1] | |
field_names = block[1].split(',') | |
rows = [] | |
for line in block[2:]: | |
rows.append(dict(zip(field_names, line.split(',')))) | |
blocks.append({ | |
'name': name, | |
'rows': rows, | |
}) | |
return blocks | |
if __name__ == '__main__': | |
bin = u"""************************** 레코드명:t8430InBlock ************************** | |
No,한글명,필드명,영문명,레코드타입,데이터사이즈,옵셋,소수점 | |
0,구분(0:전체1:코스피2:코스닥),gubun,gubun,1,1,0,0 | |
************************** 레코드명:t8430OutBlock ************************** | |
No,한글명,필드명,영문명,레코드타입,데이터사이즈,옵셋,소수점 | |
0,종목명,hname,hname,1,20,0,0 | |
1,단축코드,shcode,shcode,1,6,20,0 | |
2,확장코드,expcode,expcode,1,12,26,0 | |
3,ETF구분(1:ETF),etfgubun,etfgubun,1,1,38,0 | |
4,상한가,uplmtprice,uplmtprice,2,8,39,0 | |
5,하한가,dnlmtprice,dnlmtprice,2,8,47,0 | |
6,전일가,jnilclose,jnilclose,2,8,55,0 | |
7,주문수량단위,memedan,memedan,1,5,63,0 | |
8,기준가,recprice,recprice,2,8,68,0 | |
9,구분(1:코스피2:코스닥),gubun,gubun,1,1,76,0""" | |
blocks = parse(bin) | |
for block in blocks: | |
print block['name'] | |
for row in block['rows']: | |
print row[u'필드명'] | |
print [row[u'필드명'] for row in block['rows']] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment