Last active
May 11, 2023 10:20
-
-
Save ianlcassidy/ab315416fab3b3302ba4f6e21b75242b to your computer and use it in GitHub Desktop.
A function to automatically convert a dataframe to a Pydantic features container
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import typing as T | |
import pandas as pd | |
import numpy as np | |
def create_features_container_from_dataframe( | |
df: pd.DataFrame, | |
class_name: str = "Features", | |
) -> str: | |
out = f"""import typing as T | |
import numpy as np | |
from pydantic import BaseModel | |
from pydantic.types import conint, confloat | |
class FeatureIsNoneError(Exception): | |
pass | |
class {class_name}(BaseModel): | |
""" | |
# loop through the dataframe and create constrained fields | |
for col, dtype in df.dtypes.items(): | |
confunc = "confloat" | |
if np.issubdtype(dtype, (np.integer, int)): | |
confunc = "conint" | |
out += f" {col}: T.Optional[{confunc}(strict=True, ge={df[col].min()}, le={df[col].max()})] = None\n" | |
out += """ | |
class Config: | |
validate_assignment = True | |
def set_categorical_features( | |
self, | |
prefix: str, | |
positive_category: T.Union[str, int], | |
sep: str = "_" | |
): | |
found = False | |
for field in self.__annotations__: | |
if field == f"{prefix}{sep}{positive_category}": | |
setattr(self, field, 1) | |
found = True | |
continue | |
if prefix in field: | |
setattr(self, field, 0) | |
if not found: | |
raise ValueError( | |
f"Could not find the positive category {prefix}{sep}{positive_category}" | |
) | |
def set_bulk_features(self, mapping: T.Dict): | |
for field, value in mapping.items(): | |
setattr(self, field, value) | |
@property | |
def numpy_array(self) -> np.ndarray: | |
vals = [] | |
for field in self.__annotations__: | |
attr = getattr(self, field) | |
if attr is None: | |
raise FeatureIsNoneError(f"{field} value cannot be None") | |
vals.append(attr) | |
return np.array([vals]) | |
""" | |
return out |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment