Skip to content

Instantly share code, notes, and snippets.

@AlexanderNenninger
Created April 18, 2024 09:15
Show Gist options
  • Save AlexanderNenninger/03dc3f4a8ef794a6b5f438077040b55c to your computer and use it in GitHub Desktop.
Save AlexanderNenninger/03dc3f4a8ef794a6b5f438077040b55c to your computer and use it in GitHub Desktop.
Serialize and Deserialize Python Objects From Different Modules While Never Importing Both at the Same Time

Serialize and Deserialize Python Objects From Different Modules While Never Importing Both at the Same Time

Why?

I had a Kubeflow pipline where module size was limited. One can either revert to type unsafe data conversion or depend on a third module for data schemas.

How?

We define functionality and data in separate Python modules. While functionality gets split into different modules, data definitions are placed in a common module. The size of code that needs to be sent to pipeline workers. If data_models becomes to big, e.g. if there are more classes that are can be converted into each other, it can be split up into foo_data_model and bar_data_model.

Running the Example

python foo.py # Foo(1)
python bar.py # Bar('1')
from __future__ import annotations
import os
import pickle
import data_models
class Bar:
def __init__(self, x: str | data_models.BarData):
if isinstance(x, data_models.BarData):
self.data = x
else:
self.data = data_models.BarData(x)
def __str__(self):
return f"Bar({self.data.x!r})"
@staticmethod
def load(file: os.PathLike) -> Bar:
with open(file, "rb") as f:
data: data_models.BarData = pickle.load(f)
return Bar(data)
if __name__ == "__main__":
bar_obj = Bar.load("bar_data.pkl")
print(bar_obj) # Bar('1')
class FooData:
def __init__(self, x: int):
self.x = x
class BarData:
def __init__(self, x: str):
self.x = x
import os
import pickle
import data_models
class Foo:
def __init__(self, x: int | data_models.FooData):
match x:
case data_models.FooData:
self.data = x
case _:
self.data = data_models.FooData(x)
def __str__(self):
return f"Foo({self.data.x!r})"
def to_bar_data(self):
return data_models.BarData(str(self.data.x))
def export_to_bar(self, file: os.PathLike):
with open(file, "wb") as f:
pickle.dump(self.to_bar_data(), f)
if __name__ == "__main__":
foo_obj = Foo(1)
print(foo_obj) # Foo(1)
foo_obj.export_to_bar("bar_data.pkl")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment