Skip to content

Instantly share code, notes, and snippets.

@jcrist
Created November 28, 2023 21:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jcrist/35ddeced755402894c4436168ebe2314 to your computer and use it in GitHub Desktop.
Save jcrist/35ddeced755402894c4436168ebe2314 to your computer and use it in GitHub Desktop.
A quick benchmark of msgspec vs mashumaro to clear up misconceptions in a flyte issue
import sys
import importlib.metadata
import timeit
from dataclasses import dataclass
import msgspec
import orjson
from mashumaro.codecs.json import JSONEncoder, JSONDecoder
from mashumaro.codecs.orjson import ORJSONEncoder, ORJSONDecoder
# Dataclass type definitions, copied from
# https://flyte.org/blog/flyte-1-10-monorepo-new-agents-eager-workflows-and-more#mashumaro-to-serializedeserialize-dataclasses
@dataclass
class CurrencyPosition:
currency: str
balance: float
@dataclass
class StockPosition:
ticker: str
name: str
balance: int
@dataclass
class OP:
currencies: list[CurrencyPosition]
stocks: list[StockPosition]
# Identical types to those above, but this time defined as msgspec structs
class CurrencyPositionStruct(msgspec.Struct):
currency: str
balance: float
class StockPositionStruct(msgspec.Struct):
ticker: str
name: str
balance: int
class OPStruct(msgspec.Struct):
currencies: list[CurrencyPositionStruct]
stocks: list[StockPositionStruct]
# The in-memory message structure, also copied from that flyte blogpost above
dataclass_obj = OP(
currencies=[
CurrencyPosition("USD", 238.67),
CurrencyPosition("EUR", 361.84),
],
stocks=[
StockPosition("AAPL", "Apple", 10),
StockPosition("AMZN", "Amazon", 10),
],
)
# Make a copy of the in-memory message, but converted to our struct types
struct_obj = msgspec.convert(dataclass_obj, OPStruct, from_attributes=True)
# The message serialized as json
json = msgspec.json.encode(struct_obj)
# Here we define a bunch of encoder/decoder objects for benchmark test case
mashumaro_json_encoder = JSONEncoder(OP)
mashumaro_json_decoder = JSONDecoder(OP)
mashumaro_orjson_encoder = ORJSONEncoder(OP)
mashumaro_orjson_decoder = ORJSONDecoder(OP)
msgspec_dataclass_decoder = msgspec.json.Decoder(OP)
msgspec_struct_decoder = msgspec.json.Decoder(OPStruct)
msgspec_json_encoder = msgspec.json.Encoder()
encode_cases = [
("mashumaro & json", lambda: mashumaro_json_encoder.encode(dataclass_obj)),
("mashumaro & orjson", lambda: mashumaro_orjson_encoder.encode(dataclass_obj)),
("msgspec & dataclasses", lambda: msgspec_json_encoder.encode(dataclass_obj)),
("msgspec & structs", lambda: msgspec_json_encoder.encode(struct_obj)),
]
decode_cases = [
("mashumaro & json", lambda: mashumaro_json_decoder.decode(json)),
("mashumaro & orjson", lambda: mashumaro_orjson_decoder.decode(json)),
("msgspec & dataclasses", lambda: msgspec_dataclass_decoder.decode(json)),
("msgspec & structs", lambda: msgspec_struct_decoder.decode(json)),
]
def run_benchmarks_and_print_results():
for title, cases in [("Encoding:", encode_cases), ("Decoding:", decode_cases)]:
print(title)
results = []
for case, func in cases:
func() # Call once to warmup
timer = timeit.Timer("func()", globals={"func": func})
n, time = timer.autorange()
results.append((case, time / n))
# Format a nice results table
results.sort(key=lambda x: x[1])
best = results[0][1]
for case, time in results:
print(f"- {case + ':':22} {1_000_000 * time:.1f} µs ({time / best:.1f}x)")
print("")
print("Library versions:")
print(f"- Python: {'.'.join(map(str, sys.version_info))}")
print(f"- msgspec: {msgspec.__version__}")
print(f"- mashumaro: {importlib.metadata.version('mashumaro')}")
print(f"- orjson: {orjson.__version__}")
run_benchmarks_and_print_results()
@jcrist
Copy link
Author

jcrist commented Nov 28, 2023

This was a quick benchmark written up to clear up a performance misconception raised in this flyte issue.

Results:

$ python bench.py 
Encoding:
- msgspec & structs:     0.4 µs (1.0x)
- msgspec & dataclasses: 0.7 µs (1.6x)
- mashumaro & orjson:    1.3 µs (3.3x)
- mashumaro & json:      5.1 µs (12.3x)

Decoding:
- msgspec & structs:     0.7 µs (1.0x)
- msgspec & dataclasses: 1.3 µs (2.0x)
- mashumaro & orjson:    2.6 µs (3.9x)
- mashumaro & json:      5.2 µs (7.7x)

Library versions:
- Python: 3.11.0.final.0
- msgspec: 0.18.4
- mashumaro: 3.11
- orjson: 3.9.10

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment