Created June 18, 2024 08:43
Msgspec vs Pydantic v2
from datetime import datetime
import json
import re
import timeit
from contextlib import contextmanager
from dataclasses import dataclass
from typing import Annotated, Any, Callable, Iterator, TypedDict
from pydantic.annotated_handlers import GetJsonSchemaHandler
from pydantic.json_schema import JsonSchemaValue
from pydantic_core import core_schema
import mimesis
import msgspec
import pydantic
from pydantic.type_adapter import TypeAdapter
provider = mimesis.Generic()
class Email(str):
__slots__ = ()
def validate_email(email: str) -> Email:
if re.match(r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$", email) is None:
raise ValueError("Invalid email")
return Email(email)
def enc_hook(obj: Any) -> Any: # noqa: ANN401
if isinstance(obj, Email):
return str(obj)
def dec_hook(type_: type, val: object) -> Any: # noqa: ANN401
if type_ is Email and isinstance(val, str):
return validate_email(val)
class MsgSpecUser(msgspec.Struct):
id: str
username: str
password: str
email: Email
blog: str
first_name: str
last_name: str
is_active: bool
is_staff: bool
is_superuser: bool
date_joined: datetime
last_login: datetime
friend: "MsgSpecUser | None"
def create_user_msgspec() -> MsgSpecUser:
return MsgSpecUser(
friend=create_user_msgspec() if provider.development.boolean() else None
_data = [create_user_msgspec() for _ in range(10000)]
msgspec_encoder = msgspec.json.Encoder(enc_hook=enc_hook)
data_raw = msgspec_encoder.encode(_data)
class _PydanticEmailAnnot:
def __get_pydantic_core_schema__(
_source_type: Any,
_handler: Callable[[Any], core_schema.CoreSchema],
) -> core_schema.CoreSchema:
We return a pydantic_core.CoreSchema that behaves in the following ways:
* ints will be parsed as `ThirdPartyType` instances with the int as the x attribute
* `ThirdPartyType` instances will be parsed as `ThirdPartyType` instances without any changes
* Nothing else will pass validation
* Serialization will always return just an int
from_str_schema = core_schema.chain_schema(
return core_schema.json_or_python_schema(
# check if it's an instance first before doing any further work
lambda instance: instance
def __get_pydantic_json_schema__(
cls, _core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler
) -> JsonSchemaValue:
# Use the same schema that would be used for `int`
return handler(core_schema.int_schema())
# We now create an `Annotated` wrapper that we'll use as the annotation for fields on `BaseModel`s, etc.
PydanticEmail = Annotated[
Email, _PydanticEmailAnnot
class PydanticUser(pydantic.BaseModel):
id: str
username: str
password: str
email: PydanticEmail
blog: str
first_name: str
last_name: str
is_active: bool
is_staff: bool
is_superuser: bool
date_joined: datetime
last_login: datetime
friend: "PydanticUser | None"
class TimeitResult:
task: str
seconds: float | None = None
def time_it(task: str) -> Iterator[TimeitResult]:
start = timeit.default_timer()
res = TimeitResult(task=task)
yield res
end = timeit.default_timer()
print(f"{task} took {end - start:1f} seconds")
res.seconds = end - start
def match_precentage(pydantic: float, msgspec: float) -> str:
if pydantic < msgspec:
return f"Pydantic is faster by %{((msgspec - pydantic) / pydantic) * 100:1f}"
return f"MsgSpec is faster by %{((pydantic - msgspec) / msgspec) * 100:1f}"
msgspec_decoder = msgspec.json.Decoder(list[MsgSpecUser], dec_hook=dec_hook)
with time_it("msgspec_decode") as msgspec_res:
msgspec_data = msgspec_decoder.decode(data_raw)
users_ta = TypeAdapter(list[PydanticUser])
with time_it("pydantic_decode") as pydantic_res:
pydantic_data = users_ta.validate_json(data_raw)
print(f"DECODE: {match_precentage(pydantic_res.seconds, msgspec_res.seconds)}")
# ------------ encode ------------
with time_it("msgspec_encode") as msgspec_res:
msgspec_data_raw = msgspec_encoder.encode(msgspec_data)
with time_it("pydantic_encode") as pydantic_res:
pydantic_data_raw = users_ta.dump_json(pydantic_data)
print(f"ENCODE: {match_precentage(pydantic_res.seconds, msgspec_res.seconds)}")
msgspec_decode took 0.050580 seconds
pydantic_decode took 0.150948 seconds
DECODE: MsgSpec is faster by %198.433165
msgspec_encode took 0.015060 seconds
pydantic_encode took 0.060530 seconds
ENCODE: MsgSpec is faster by %301.920586

