Skip to content

Instantly share code, notes, and snippets.

@nrbnlulu
Created June 18, 2024 08:43
Show Gist options
  • Save nrbnlulu/e983ab23bed5806cff5bb8ba97434d6d to your computer and use it in GitHub Desktop.
Save nrbnlulu/e983ab23bed5806cff5bb8ba97434d6d to your computer and use it in GitHub Desktop.
Msgspec vs Pydantic v2
from datetime import datetime
import json
import re
import timeit
from contextlib import contextmanager
from dataclasses import dataclass
from typing import Annotated, Any, Callable, Iterator, TypedDict
from pydantic.annotated_handlers import GetJsonSchemaHandler
from pydantic.json_schema import JsonSchemaValue
from pydantic_core import core_schema
import mimesis
import msgspec
import pydantic
from pydantic.type_adapter import TypeAdapter
provider = mimesis.Generic()
class Email(str):
__slots__ = ()
def validate_email(email: str) -> Email:
if re.match(r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$", email) is None:
raise ValueError("Invalid email")
return Email(email)
def enc_hook(obj: Any) -> Any: # noqa: ANN401
if isinstance(obj, Email):
return str(obj)
def dec_hook(type_: type, val: object) -> Any: # noqa: ANN401
if type_ is Email and isinstance(val, str):
return validate_email(val)
class MsgSpecUser(msgspec.Struct):
id: str
username: str
password: str
email: Email
blog: str
first_name: str
last_name: str
is_active: bool
is_staff: bool
is_superuser: bool
date_joined: datetime
last_login: datetime
friend: "MsgSpecUser | None"
def create_user_msgspec() -> MsgSpecUser:
return MsgSpecUser(
id=provider.person.identifier(),
username=provider.person.username(),
password=provider.person.password(),
email=validate_email(provider.person.email()),
blog=provider.internet.url(),
first_name=provider.person.name(),
last_name=provider.person.last_name(),
is_active=provider.development.boolean(),
is_staff=provider.development.boolean(),
is_superuser=provider.development.boolean(),
date_joined=provider.datetime.datetime(),
last_login=provider.datetime.datetime(),
friend=create_user_msgspec() if provider.development.boolean() else None
)
_data = [create_user_msgspec() for _ in range(10000)]
msgspec_encoder = msgspec.json.Encoder(enc_hook=enc_hook)
data_raw = msgspec_encoder.encode(_data)
class _PydanticEmailAnnot:
@classmethod
def __get_pydantic_core_schema__(
cls,
_source_type: Any,
_handler: Callable[[Any], core_schema.CoreSchema],
) -> core_schema.CoreSchema:
"""
We return a pydantic_core.CoreSchema that behaves in the following ways:
* ints will be parsed as `ThirdPartyType` instances with the int as the x attribute
* `ThirdPartyType` instances will be parsed as `ThirdPartyType` instances without any changes
* Nothing else will pass validation
* Serialization will always return just an int
"""
from_str_schema = core_schema.chain_schema(
[
core_schema.str_schema(),
core_schema.no_info_plain_validator_function(validate_email),
]
)
return core_schema.json_or_python_schema(
json_schema=from_str_schema,
python_schema=core_schema.union_schema(
[
# check if it's an instance first before doing any further work
core_schema.is_instance_schema(Email),
from_str_schema,
]
),
serialization=core_schema.plain_serializer_function_ser_schema(
lambda instance: instance
),
)
@classmethod
def __get_pydantic_json_schema__(
cls, _core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler
) -> JsonSchemaValue:
# Use the same schema that would be used for `int`
return handler(core_schema.int_schema())
# We now create an `Annotated` wrapper that we'll use as the annotation for fields on `BaseModel`s, etc.
PydanticEmail = Annotated[
Email, _PydanticEmailAnnot
]
class PydanticUser(pydantic.BaseModel):
id: str
username: str
password: str
email: PydanticEmail
blog: str
first_name: str
last_name: str
is_active: bool
is_staff: bool
is_superuser: bool
date_joined: datetime
last_login: datetime
friend: "PydanticUser | None"
@dataclass
class TimeitResult:
task: str
seconds: float | None = None
@contextmanager
def time_it(task: str) -> Iterator[TimeitResult]:
start = timeit.default_timer()
res = TimeitResult(task=task)
yield res
end = timeit.default_timer()
print(f"{task} took {end - start:1f} seconds")
res.seconds = end - start
def match_precentage(pydantic: float, msgspec: float) -> str:
if pydantic < msgspec:
return f"Pydantic is faster by %{((msgspec - pydantic) / pydantic) * 100:1f}"
return f"MsgSpec is faster by %{((pydantic - msgspec) / msgspec) * 100:1f}"
msgspec_decoder = msgspec.json.Decoder(list[MsgSpecUser], dec_hook=dec_hook)
with time_it("msgspec_decode") as msgspec_res:
msgspec_data = msgspec_decoder.decode(data_raw)
users_ta = TypeAdapter(list[PydanticUser])
with time_it("pydantic_decode") as pydantic_res:
pydantic_data = users_ta.validate_json(data_raw)
print(f"DECODE: {match_precentage(pydantic_res.seconds, msgspec_res.seconds)}")
# ------------ encode ------------
with time_it("msgspec_encode") as msgspec_res:
msgspec_data_raw = msgspec_encoder.encode(msgspec_data)
with time_it("pydantic_encode") as pydantic_res:
pydantic_data_raw = users_ta.dump_json(pydantic_data)
print(f"ENCODE: {match_precentage(pydantic_res.seconds, msgspec_res.seconds)}")
@nrbnlulu
Copy link
Author

results

msgspec_decode took 0.050580 seconds
pydantic_decode took 0.150948 seconds
DECODE: MsgSpec is faster by %198.433165
msgspec_encode took 0.015060 seconds
pydantic_encode took 0.060530 seconds
ENCODE: MsgSpec is faster by %301.920586

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment