Skip to content

Instantly share code, notes, and snippets.

@jcrist
Last active January 11, 2024 14:34
Show Gist options
  • Star 13 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jcrist/d62f450594164d284fbea957fd48b743 to your computer and use it in GitHub Desktop.
Save jcrist/d62f450594164d284fbea957fd48b743 to your computer and use it in GitHub Desktop.
A quick benchmark comparing msgspec (https://github.com/jcrist/msgspec), pydantic v1, and pydantic v2
"""A quick benchmark comparing the performance of:
- msgspec: https://github.com/jcrist/msgspec
- pydantic V1: https://docs.pydantic.dev/1.10/
- pydantic V2: https://docs.pydantic.dev/dev-v2/
The benchmark is modified from the one in the msgspec repo here:
https://github.com/jcrist/msgspec/blob/main/benchmarks/bench_validation.py
I make no claims that it's illustrative of all use cases. I wrote this up
mostly to get an understanding of how msgspec's performance compares with that
of pydantic V2.
"""
from __future__ import annotations
import datetime
import random
import string
import timeit
import uuid
from typing import List, Literal, Union, Annotated
import msgspec
import pydantic
import pydantic.v1
def make_filesystem_data(capacity):
"""Generate a tree structure representing a fake filesystem"""
UTC = datetime.timezone.utc
DATE_2018 = datetime.datetime(2018, 1, 1, tzinfo=UTC)
DATE_2023 = datetime.datetime(2023, 1, 1, tzinfo=UTC)
UUIDS = [str(uuid.uuid4()) for _ in range(30)]
rand = random.Random(42)
def randdt(min, max):
ts = rand.randint(min.timestamp(), max.timestamp())
return datetime.datetime.fromtimestamp(ts).replace(tzinfo=UTC)
def randstr(min=None, max=None):
if max is not None:
min = rand.randint(min, max)
return "".join(rand.choices(string.ascii_letters, k=min))
def make_node(is_dir):
nonlocal capacity
name = randstr(4, 30)
created_by = rand.choice(UUIDS)
created_at = randdt(DATE_2018, DATE_2023)
updated_at = randdt(created_at, DATE_2023)
data = {
"type": "directory" if is_dir else "file",
"name": name,
"created_by": created_by,
"created_at": created_at.isoformat(),
"updated_at": updated_at.isoformat(),
}
if is_dir:
n = min(rand.randint(0, 30), capacity)
capacity -= n
data["contents"] = [make_node(rand.random() > 0.9) for _ in range(n)]
else:
data["nbytes"] = rand.randint(0, 1000000)
return data
capacity -= 1
out = make_node(True)
while capacity:
capacity -= 1
out["contents"].append(make_node(rand.random() > 0.9))
return out
def bench(raw_data, dumps, loads, convert):
msg = convert(raw_data)
json_data = dumps(msg)
msg2 = loads(json_data)
assert msg == msg2
del msg2
timer = timeit.Timer("func(data)", setup="", globals={"func": dumps, "data": msg})
n, t = timer.autorange()
dumps_time = t / n
timer = timeit.Timer(
"func(data)", setup="", globals={"func": loads, "data": json_data}
)
n, t = timer.autorange()
loads_time = t / n
return dumps_time, loads_time
#############################################################################
# msgspec #
#############################################################################
class File(msgspec.Struct, tag="file"):
name: Annotated[str, msgspec.Meta(min_length=1)]
created_by: uuid.UUID
created_at: datetime.datetime
updated_at: datetime.datetime
nbytes: Annotated[int, msgspec.Meta(ge=0)]
class Directory(msgspec.Struct, tag="directory"):
name: Annotated[str, msgspec.Meta(min_length=1)]
created_by: uuid.UUID
created_at: datetime.datetime
updated_at: datetime.datetime
contents: List[Union[File, Directory]]
def bench_msgspec(data):
enc = msgspec.json.Encoder()
dec = msgspec.json.Decoder(Directory)
def convert(data):
return msgspec.convert(data, Directory)
return bench(data, enc.encode, dec.decode, convert)
#############################################################################
# pydantic V2 #
#############################################################################
class FileModel(pydantic.BaseModel):
type: Literal["file"] = "file"
name: str = pydantic.Field(min_length=1)
created_by: uuid.UUID
created_at: datetime.datetime
updated_at: datetime.datetime
nbytes: pydantic.NonNegativeInt
class DirectoryModel(pydantic.BaseModel):
type: Literal["directory"] = "directory"
name: str = pydantic.Field(min_length=1)
created_by: uuid.UUID
created_at: datetime.datetime
updated_at: datetime.datetime
contents: List[Union[DirectoryModel, FileModel]]
def bench_pydantic_v2(data):
return bench(
data,
lambda p: p.model_dump_json(),
DirectoryModel.model_validate_json,
lambda data: DirectoryModel(**data),
)
#############################################################################
# pydantic V1 #
#############################################################################
class FileModelV1(pydantic.v1.BaseModel):
type: Literal["file"] = "file"
name: str = pydantic.v1.Field(min_length=1)
created_by: uuid.UUID
created_at: datetime.datetime
updated_at: datetime.datetime
nbytes: pydantic.v1.NonNegativeInt
class DirectoryModelV1(pydantic.v1.BaseModel):
type: Literal["directory"] = "directory"
name: str = pydantic.v1.Field(min_length=1)
created_by: uuid.UUID
created_at: datetime.datetime
updated_at: datetime.datetime
contents: List[Union[DirectoryModelV1, FileModelV1]]
def bench_pydantic_v1(data):
return bench(
data,
lambda p: p.json(),
DirectoryModelV1.parse_raw,
lambda data: DirectoryModelV1(**data),
)
if __name__ == "__main__":
N = 1000
data = make_filesystem_data(N)
ms_dumps, ms_loads = bench_msgspec(data)
ms_total = ms_dumps + ms_loads
title = f"msgspec {msgspec.__version__}"
print(title)
print("-" * len(title))
print(f"dumps: {ms_dumps * 1e6:.1f} us")
print(f"loads: {ms_loads * 1e6:.1f} us")
print(f"total: {ms_total * 1e6:.1f} us")
for title, func in [
(f"pydantic {pydantic.__version__}", bench_pydantic_v2),
(f"pydantic {pydantic.v1.__version__}", bench_pydantic_v1)
]:
print()
print(title)
print("-" * len(title))
dumps, loads = func(data)
total = dumps + loads
print(f"dumps: {dumps * 1e6:.1f} us ({dumps / ms_dumps:.1f}x slower)")
print(f"loads: {loads * 1e6:.1f} us ({loads / ms_loads:.1f}x slower)")
print(f"total: {total * 1e6:.1f} us ({total / ms_total:.1f}x slower)")
@samuelcolvin
Copy link

Up to you, just making the observation really.

@legraphista
Copy link

quick update on the numbers as pydantic v2 became stable:

msgspec 0.16.0
--------------
dumps: 179.3 us
loads: 477.0 us
total: 656.3 us

pydantic 2.0.1
--------------
dumps: 4292.0 us (23.9x slower)
loads: 6666.6 us (14.0x slower)
total: 10958.6 us (16.7x slower)

pydantic 1.10.11
----------------
dumps: 24176.3 us (134.8x slower)
loads: 73471.1 us (154.0x slower)
total: 97647.4 us (148.8x slower)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment