Skip to content

Instantly share code, notes, and snippets.

@yxy
Created October 11, 2018 04:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yxy/ef05a02ea475503625c915eaf4789ad4 to your computer and use it in GitHub Desktop.
Save yxy/ef05a02ea475503625c915eaf4789ad4 to your computer and use it in GitHub Desktop.
反序列化数据列,支持字符,数组,字典等复杂结构,
"""
导出数据
~~~~~~~~~~~~~
反序列化数据列,支持字符,数组,字典等复杂结构,
TODO:
- 数据类型标注: string,bool,int,float
- 默认值
- 必须值
"""
def read_char(s, pos):
r = ""
while pos < len(s) and s[pos].isalpha():
r += s[pos]
pos += 1
return r, pos
def read_next(s, pos):
if pos < len(s):
return s[pos + 1]
return None
def read_until(s, pos, t):
"""
:param s:
:param pos:
:param t: terminate char
:return:
"""
r = ""
while pos < len(s) and s[pos] != t:
r += s[pos]
pos += 1
return r, pos
def parse(s, value, data, pos=0):
token, pos = read_char(s, pos)
if pos == len(s):
data[token] = value
return data
c = s[pos] # current pointer
if c == '.':
data[token] = parse(s, value, data={}, pos=pos + 1)
return data
elif c == '[':
data.setdefault(token, [])
num, pos = read_until(s, pos=pos + 1, t="]")
if num:
length = int(num) + 1
data[token] += [None for _ in range(length - len(data[token]))]
c = read_next(s, pos)
if c is None:
data[token][int(num)] = value
elif c == '.':
x = data[token][int(num)] or {}
data[token][int(num)] = parse(s, value, data=x, pos=pos + 2)
return data
else:
data[token] = value.split(';')
return data
return data
assert parse('name', 'yxy', data={}) == {'name': 'yxy'}
assert parse('name.first', 'yxy', data={}) == {'name': {'first': 'yxy'}}
assert parse('name[]', 'yxy;xx;xx', data={}) == {'name': ['yxy', 'xx', 'xx']}
assert parse('name[1].first', 'yxy', data={}) == {'name': [None, {'first': 'yxy'}]}
assert parse('name[0].name', 'yxy', data={}) == {'name': [{'name': 'yxy'}]}
assert parse('name[1].first', 'yxy', data={}) == {'name': [None, {'first': 'yxy'}]}
assert parse('name[1].first[]', '1;2;3', data={}) == {'name': [None, {'first': ['1', '2', '3']}]}
def main():
from pprint import pprint
from csv import DictReader
with open('data.csv') as fb:
reader = DictReader(fb)
dataset = []
for kv in reader:
data = {}
for k, v in kv.items():
parse(k, v, data=data, pos=0)
dataset.append(data)
pprint(dataset)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment