Skip to content

Instantly share code, notes, and snippets.

@titouanfreville
Created April 13, 2024 09:45
Show Gist options
  • Save titouanfreville/6701dcaa1eb48c0e8e32bd77df0e900d to your computer and use it in GitHub Desktop.
Save titouanfreville/6701dcaa1eb48c0e8e32bd77df0e900d to your computer and use it in GitHub Desktop.
Simple perf test for pydantic to dataframe
import cProfile
import pandas as pd
from pydantic import BaseModel
class SomeModel(BaseModel):
col1: int
col2: str
def getGeneratorMethod(method: str):
match method:
case "py_dict":
def generateChange(data):
for d in data:
yield d.dict()
case "vars":
def generateChange(data):
for d in data:
yield vars(d)
case "dict":
def generateChange(data):
for d in data:
yield dict(d)
case "internal_dict":
def generateChange(data):
for d in data:
yield d.__dict__
return generateChange
data = [SomeModel(col1=1, col2="foo"), SomeModel(col1=2, col2="bar")] * 4 * 10**5
print(pd.DataFrame(getGeneratorMethod("internal_dict")(data)))
print(">>> Profile transform method")
cProfile.run("getGeneratorMethod('internal_dict')") # Free
print(">>> Profile using list comprehension")
cProfile.run("pd.DataFrame([s.dict() for s in data])") # around 9.5s
cProfile.run("pd.DataFrame([s.__dict__ for s in data])") # around 1.1s
cProfile.run("pd.DataFrame([dict(s) for s in data])") # around 9.8s
cProfile.run("pd.DataFrame([vars(s) for s in data])") # 1.6s
print(">>> Profile using generators")
cProfile.run("pd.DataFrame(getGeneratorMethod('py_dict')(data))") # around 8.1s
cProfile.run("pd.DataFrame(getGeneratorMethod('internal_dict')(data))") # around 1.2s
cProfile.run("pd.DataFrame(getGeneratorMethod('dict')(data))") # around 10s
cProfile.run("pd.DataFrame(getGeneratorMethod('vars')(data))") # 1.7s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment