Skip to content

Instantly share code, notes, and snippets.

@takkaria
Last active December 31, 2020 21:17
Show Gist options
  • Save takkaria/d8616b1720c05dd763b845e15766d4f1 to your computer and use it in GitHub Desktop.
Save takkaria/d8616b1720c05dd763b845e15766d4f1 to your computer and use it in GitHub Desktop.

Existing ways to do this:

  1. Are not type safe (DRF)
  2. Require the same structure between the JSON and the serialization (dataclasses_json)

payload:

{
    "date": "2020-12-12",
    "type": "membership:statuschange",
    "number_as_str": "2",
    "payload": {
        "from": "CURRENT",
        "to": "EXPIRED",
        "who": [
            { "name": "Fred Whelks", id: 102 },
            { "name": "Fisher Carrie", id: 1334 }
        ]
    },
    "from": "Hub <bef34acb948f>"
}

python:

from typing import List, Union, Callable, TypeVar, Any, Tuple, Optional
from enum import Enum, auto
import re
from dataclasses import dataclass
from datetime import date

T = TypeVar('T')


@dataclass
class PathToken:
    name: str


@dataclass
class ArrayToken:
    pass


Token = Union[PathToken, ArrayToken]


class StartState:
    pass


class SeparatorState:
    pass


class NameState:
    first: bool
    captured: str = ""


class ArrayStartState:
    pass


class ArrayEndState:
    pass


class FinishedState:
    pass


State = Union[
    StartState,
    SeparatorState,
    NameState,
    ArrayStartState,
    ArrayEndState,
    FinishedState,
]

NAME = re.compile(r"[a-zA-Z_-]")


class TokeniserError(ValueError):
    message: str
    context: Optional[Tuple[str, int]] = None

    def __init__(self, message):
        self.message = message

    def __str__(self):
        if self.context is None:
            return self.message
        else:
            path, idx = self.context
            return (
                f"{self.message} at index {idx}:\n" +
                f"{path}\n" +
                f"{' ' * idx}^"
            )


def _process_char(state: State, ch: Union[str, None]) -> Tuple[State, Optional[Token]]:
    if type(state) is StartState:
        if ch != ".":
            raise TokeniserError("All paths should start with the dot")
        else:
            return NameState(first=True)

    elif type(state) is NameState:
        if ch is None:
            # We accept paths like "."
            if state.first is True:
                return FinishedState()
            else:
                raise TokeniserError("Trailing dot at the end of a path")
        elif re.match(NAME, ch):
            state.captured += ch
            return state
        elif ch == "[":
            return ArrayStartState(), PathToken(state.captured)
        elif ch == ".":
            return NameState(first=False), PathToken(state.captured)
        else:
            raise TokeniserError(f"Was expecting something patching {NAME}")

    elif type(state) is ArrayStartState:
        if ch != "]":
            raise TokeniserError("Array indicators should be spelled []")
        else:
            return ArrayEndState(), ArrayToken()

    elif type(state) is ArrayEndState:
        # We are expecting either EOF or a separator
        if ch is None:
            return FinishedState()
        elif ch == ".":
            state = NameState(first=False)
        else:
            raise TokeniserError("Array end should only be followed by EOF or separator")

    raise TokeniserError("Tokeniser got into a bad state")


def tokenise(self, path: str) -> List[Token]:
    state: State = StartState()
    tokens: List[Token] = []

    split_path = list(path) + [None]

    for idx, ch in enumerate(split_path):
        try:
            state, token = _process_char(state, ch)
        except TokeniserError as exc:
            exc.context = path, idx
            raise exc
        else:
            if token:
                tokens += [token]

    assert type(state) is FinishedState
    return tokens


class Pluck:
    def __init__(self, path: str):
        self.path = tokenise(path)
        self.mapper = None
        self.info = None

    def map(self, mapper: Union[Callable, dict]) -> "Pluck":
        self.mapper = mapper
        return self

    def into(self, into) -> "Pluck":
        self.into = into
        return self

    def pluck(self, data: Any, expected_type: T) -> T:
        # Special case the empty path
        if self.path == []:
            source = data
        else:
            # ...
            pass

        assert type(source) == type(expected_type)

        return None


def parse_into(data: Any, *, into: T, **kwargs) -> T:
    attrs = {}
    for attr, plucker in kwargs.items():
        type_of_attr = type.__annotations__[attr]
        attrs[attr] = plucker.pluck(data, type_of_attr)

    return type(**attrs)


class MemberStatus(Enum):
    CURRENT = auto()
    EXPIRED = auto()


TO_STATUS = {
    "CUR": MemberStatus.CURRENT,
    "EXP": MemberStatus.EXPIRED
}


@dataclass
class Person:
    name: str
    id: int


@dataclass
class StatusChange:
    date: date
    state_from: Enum[MemberStatus]
    state_to: Enum[MemberStatus]
    ids: List[int]
    num: int


def extract(json: dict) -> StatusChange:
    return extract(
        json,
        into=StatusChange,
        date=Pluck(".date"),
        num=Pluck(".number_as_str").map(int),
        state_from=Pluck(".payload.from").map(TO_STATUS),
        state_to=Pluck(".payload.to").map(TO_STATUS),
        ids=Pluck(".payload.who[].id"),
        people=Pluck(".payload.who[]").into(
            Person,
            name=Pluck(".name"),
            id=Pluck(".id"),
        )
    )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment