Existing ways to do this:
- Are not type safe (DRF)
- Require the same structure between the JSON and the serialization (dataclasses_json)
payload:
{
"date": "2020-12-12",
"type": "membership:statuschange",
"number_as_str": "2",
"payload": {
"from": "CURRENT",
"to": "EXPIRED",
"who": [
{ "name": "Fred Whelks", id: 102 },
{ "name": "Fisher Carrie", id: 1334 }
]
},
"from": "Hub <bef34acb948f>"
}
python:
from typing import List, Union, Callable, TypeVar, Any, Tuple, Optional
from enum import Enum, auto
import re
from dataclasses import dataclass
from datetime import date
T = TypeVar('T')
@dataclass
class PathToken:
name: str
@dataclass
class ArrayToken:
pass
Token = Union[PathToken, ArrayToken]
class StartState:
pass
class SeparatorState:
pass
class NameState:
first: bool
captured: str = ""
class ArrayStartState:
pass
class ArrayEndState:
pass
class FinishedState:
pass
State = Union[
StartState,
SeparatorState,
NameState,
ArrayStartState,
ArrayEndState,
FinishedState,
]
NAME = re.compile(r"[a-zA-Z_-]")
class TokeniserError(ValueError):
message: str
context: Optional[Tuple[str, int]] = None
def __init__(self, message):
self.message = message
def __str__(self):
if self.context is None:
return self.message
else:
path, idx = self.context
return (
f"{self.message} at index {idx}:\n" +
f"{path}\n" +
f"{' ' * idx}^"
)
def _process_char(state: State, ch: Union[str, None]) -> Tuple[State, Optional[Token]]:
if type(state) is StartState:
if ch != ".":
raise TokeniserError("All paths should start with the dot")
else:
return NameState(first=True)
elif type(state) is NameState:
if ch is None:
# We accept paths like "."
if state.first is True:
return FinishedState()
else:
raise TokeniserError("Trailing dot at the end of a path")
elif re.match(NAME, ch):
state.captured += ch
return state
elif ch == "[":
return ArrayStartState(), PathToken(state.captured)
elif ch == ".":
return NameState(first=False), PathToken(state.captured)
else:
raise TokeniserError(f"Was expecting something patching {NAME}")
elif type(state) is ArrayStartState:
if ch != "]":
raise TokeniserError("Array indicators should be spelled []")
else:
return ArrayEndState(), ArrayToken()
elif type(state) is ArrayEndState:
# We are expecting either EOF or a separator
if ch is None:
return FinishedState()
elif ch == ".":
state = NameState(first=False)
else:
raise TokeniserError("Array end should only be followed by EOF or separator")
raise TokeniserError("Tokeniser got into a bad state")
def tokenise(self, path: str) -> List[Token]:
state: State = StartState()
tokens: List[Token] = []
split_path = list(path) + [None]
for idx, ch in enumerate(split_path):
try:
state, token = _process_char(state, ch)
except TokeniserError as exc:
exc.context = path, idx
raise exc
else:
if token:
tokens += [token]
assert type(state) is FinishedState
return tokens
class Pluck:
def __init__(self, path: str):
self.path = tokenise(path)
self.mapper = None
self.info = None
def map(self, mapper: Union[Callable, dict]) -> "Pluck":
self.mapper = mapper
return self
def into(self, into) -> "Pluck":
self.into = into
return self
def pluck(self, data: Any, expected_type: T) -> T:
# Special case the empty path
if self.path == []:
source = data
else:
# ...
pass
assert type(source) == type(expected_type)
return None
def parse_into(data: Any, *, into: T, **kwargs) -> T:
attrs = {}
for attr, plucker in kwargs.items():
type_of_attr = type.__annotations__[attr]
attrs[attr] = plucker.pluck(data, type_of_attr)
return type(**attrs)
class MemberStatus(Enum):
CURRENT = auto()
EXPIRED = auto()
TO_STATUS = {
"CUR": MemberStatus.CURRENT,
"EXP": MemberStatus.EXPIRED
}
@dataclass
class Person:
name: str
id: int
@dataclass
class StatusChange:
date: date
state_from: Enum[MemberStatus]
state_to: Enum[MemberStatus]
ids: List[int]
num: int
def extract(json: dict) -> StatusChange:
return extract(
json,
into=StatusChange,
date=Pluck(".date"),
num=Pluck(".number_as_str").map(int),
state_from=Pluck(".payload.from").map(TO_STATUS),
state_to=Pluck(".payload.to").map(TO_STATUS),
ids=Pluck(".payload.who[].id"),
people=Pluck(".payload.who[]").into(
Person,
name=Pluck(".name"),
id=Pluck(".id"),
)
)