Skip to content

Instantly share code, notes, and snippets.

@thomasaarholt
Last active November 23, 2023 19:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thomasaarholt/d81c0cca779978b2c0a9ee91b5f94f85 to your computer and use it in GitHub Desktop.
Save thomasaarholt/d81c0cca779978b2c0a9ee91b5f94f85 to your computer and use it in GitHub Desktop.
Structlog example showing how to serialize polars and pandas dataframes and pydantic models
from typing import Any
import json
import structlog
import pandas as pd # pip install pandas
import polars as pl # pip install polars
from pydantic import BaseModel
class PydanticModel(BaseModel):
a: int
b: str
df = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
df2 = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
pydantic_model = PydanticModel(a=1, b="a")
def default(obj: pl.DataFrame | pd.DataFrame | BaseModel) -> dict[str, Any]:
"Return a JSON serializable version of a DataFrame or Pydantic model."
if isinstance(obj, pl.DataFrame):
return json.loads(obj.write_json())
elif isinstance(obj, pd.DataFrame):
return json.loads(obj.to_json())
elif isinstance(obj, BaseModel):
return obj.model_dump(mode="json")
raise TypeError(
f"Object of type {obj.__class__.__name__} is not JSON serializable. "
"Add entries to the `default` function to support this type."
)
# BAD
# JSON is just a jsonified version of __repr__
processors = [structlog.processors.JSONRenderer()]
structlog.configure(processors)
log = structlog.stdlib.get_logger()
log.info("polars", df=df)
log.info("pandas", df=df2)
log.info("pydantic", df=pydantic_model)
# {"df": "shape: (3, 2)\n\u250c\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502 a \u2506 b \u2502\n\u2502 --- \u2506 --- \u2502\n\u2502 i64 \u2506 str \u2502\n\u255e\u2550\u2550\u2550\u2550\u2550\u256a\u2550\u2550\u2550\u2550\u2550\u2561\n\u2502 1 \u2506 a \u2502\n\u2502 2 \u2506 b \u2502\n\u2502 3 \u2506 c \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2534\u2500\u2500\u2500\u2500\u2500\u2518", "event": "polars"}
# {"df": " a b\n0 1 a\n1 2 b\n2 3 c", "event": "pandas"}
# {"df": "PydanticModel(a=1, b='a')", "event": "pydantic"}
# GOOD
# JSON is created from properly serializable objects
processors = [structlog.processors.JSONRenderer(default=default)]
structlog.configure(processors)
log.info("polars", df=df)
log.info("pandas", df=df2)
log.info("pydantic", df=pydantic_model)
# {"df": {"columns": [{"name": "a", "datatype": "Int64", "bit_settings": "", "values": [1, 2, 3]}, {"name": "b", "datatype": "Utf8", "bit_settings": "", "values": ["a", "b", "c"]}]}, "event": "polars"}
# {"df": {"a": {"0": 1, "1": 2, "2": 3}, "b": {"0": "a", "1": "b", "2": "c"}}, "event": "pandas"}
# {"df": {"a": 1, "b": "a"}, "event": "pydantic"}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment