Skip to content

Instantly share code, notes, and snippets.

@jcrist
Created January 31, 2022 20:58
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jcrist/782c463f9eb52211e255caa96f57ab97 to your computer and use it in GitHub Desktop.
Save jcrist/782c463f9eb52211e255caa96f57ab97 to your computer and use it in GitHub Desktop.
A (naive) benchmark comparing pydantic & msgspec performance
"""
This benchmark is a modified version of the benchmark available at
https://github.com/samuelcolvin/pydantic/tree/master/benchmarks to support
benchmarking msgspec.
The benchmark measures the time to JSON encode/decode `n` random objects
matching a specific schema. It compares the time required for both
serialization _and_ schema validation.
"""
import argparse
import random
import string
import time
from functools import partial
from datetime import datetime
from typing import List, Optional
import orjson
import pydantic
import msgspec
PUNCTUATION = " \t\n!\"#$%&'()*+,-./"
LETTERS = string.ascii_letters
UNICODE = "\xa0\xad¡¢£¤¥¦§¨©ª«¬ ®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
ALL = PUNCTUATION * 5 + LETTERS * 20 + UNICODE
def rand_string(min_length, max_length, corpus=ALL):
return "".join(random.choices(corpus, k=random.randrange(min_length, max_length)))
MISSING = object()
def null_missing_v(f, null_chance=0.2, missing_chance=None):
r = random.random()
if random.random() < null_chance:
return None
missing_chance = null_chance if missing_chance is None else missing_chance
if r < (null_chance + missing_chance):
return MISSING
return f()
def null_missing_string(*args, **kwargs):
f = partial(rand_string, *args)
return null_missing_v(f, **kwargs)
def rand_date():
r = random.randrange
return f"{r(1900, 2020):04}-{r(1, 12):02}-{r(1, 28):02}T{r(0, 24):02}:{r(0, 60):02}:{r(0, 60):02}Z"
def remove_missing(d):
if isinstance(d, dict):
return {k: remove_missing(v) for k, v in d.items() if v is not MISSING}
elif isinstance(d, list):
return [remove_missing(d_) for d_ in d]
else:
return d
def generate_case():
return remove_missing(
dict(
id=random.randrange(1, 2000),
client_name=rand_string(10, 280),
sort_index=random.random() * 200,
client_phone=null_missing_string(5, 15),
location=dict(
latitude=random.random() * 180 - 90,
longitude=random.random() * 180,
),
contractor=random.randrange(-100, 2000),
upstream_http_referrer=null_missing_string(10, 1050),
grecaptcha_response=null_missing_string(
10, 1050, null_chance=0.05, missing_chance=0.05
),
last_updated=rand_date(),
skills=[
dict(
subject=rand_string(5, 20),
subject_id=i,
category=rand_string(5, 20),
qual_level=rand_string(5, 20),
qual_level_id=random.randrange(2000),
qual_level_ranking=random.random() * 20,
)
for i in range(random.randrange(1, 5))
],
)
)
class BenchPydantic:
name = "pydantic + orjson"
def __init__(self):
class Location(pydantic.BaseModel):
latitude: float
longitude: float
class Skill(pydantic.BaseModel):
subject: str
subject_id: int
category: str
qual_level: str
qual_level_id: int
qual_level_ranking: float = 0
class Client(pydantic.BaseModel):
id: int
client_name: str
sort_index: float
client_phone: Optional[str] = None
location: Optional[Location] = None
contractor: Optional[int] = None
upstream_http_referrer: Optional[str] = None
grecaptcha_response: Optional[str] = None
last_updated: Optional[datetime] = None
skills: List[Skill] = []
class Model(pydantic.BaseModel):
clients: List[Client]
self.model = Model
def decode(self, msg):
raw = orjson.loads(msg)
return self.model.parse_obj(raw)
def encode(self, obj):
raw = obj.dict()
return orjson.dumps(raw)
class BenchMsgspec:
name = "msgspec"
def __init__(self):
class Location(msgspec.Struct):
latitude: float
longitude: float
class Skill(msgspec.Struct):
subject: str
subject_id: int
category: str
qual_level: str
qual_level_id: int
qual_level_ranking: float = 0
class Client(msgspec.Struct):
id: int
client_name: str
sort_index: float
client_phone: Optional[str] = None
location: Optional[Location] = None
contractor: Optional[int] = None
upstream_http_referrer: Optional[str] = None
grecaptcha_response: Optional[str] = None
last_updated: Optional[datetime] = None
skills: List[Skill] = []
class Model(msgspec.Struct):
clients: List[Client]
self.encoder = msgspec.json.Encoder()
self.decoder = msgspec.json.Decoder(Model)
def decode(self, msg):
return self.decoder.decode(msg)
def encode(self, obj):
return self.encoder.encode(obj)
def main(n):
# Estimate a good number of rounds
rounds = min(1000, 10000 // n)
print(f"Benchmarking JSON encoding/decoding performance ({n} objects)")
data = orjson.dumps({"clients": [generate_case() for _ in range(n)]})
for bench in [BenchPydantic(), BenchMsgspec()]:
print(f"* {bench.name}:")
start = time.perf_counter()
for _ in range(rounds):
bench.decode(data)
dec_time = (time.perf_counter() - start) / rounds
print(f" - Decoding: {(dec_time * 1e3):.3f} ms")
obj = bench.decode(data)
start = time.perf_counter()
for _ in range(rounds):
bench.encode(obj)
enc_time = (time.perf_counter() - start) / rounds
print(f" - Encoding: {(enc_time * 1e3):.3f} ms")
total = enc_time + dec_time
print(f" - Total: {(total * 1e3):.3f} ms")
if __name__ == "__main__":
parser = argparse.ArgumentParser("bench-pydantic", description=__doc__)
parser.add_argument("-n", type=int, default=1000, help="How many objects to bench")
args = parser.parse_args()
main(args.n)
@jcrist
Copy link
Author

jcrist commented Jan 31, 2022

Results on my machine:

$ python bench.py
Benchmarking JSON encoding/decoding performance (1000 objects)
* pydantic + orjson:
  - Decoding: 55.889 ms
  - Encoding: 37.665 ms
  - Total: 93.553 ms
* msgspec:
  - Decoding: 5.602 ms
  - Encoding: 1.587 ms
  - Total: 7.190 ms

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment