RyanJulyan/fake_data_factory.py

## fake_data_factory.py
from typing import Any, get_args, get_origin, Type, Dict, Optional, List
import random
from datetime import date, datetime

import attr
from faker import Faker


def data_factory(
    cls: Type,
    num_records: int,
    custom_attribute_mapping: Dict[str, str] = {},
    seed: Optional[int] = None,
    list_min: int = 0,
    list_max: int = 10,
) -> List:
    """
    Generates instances of a given class using the Faker library.
    This can be made consisten if a `seed` is set.

    Args:
        cls (Type): The class type for which instances will be generated.
        num_records (int): The number of records to generate.
        custom_attribute_mapping (Dict[str, str], optional): Custom mapping between attribute names and Faker methods.
        seed (Optional[int], optional): Seed for the random number generator to ensure consistency. Defaults to None.
        list_min (int, optional): Minimum number of elements in a list attribute. Defaults to 0.
        list_max (int, optional): Maximum number of elements in a list attribute. Defaults to 10.

    Returns:
        List: A list of instances of the specified class.
    """

    # Define a mapping between attribute names and corresponding Faker methods
    attribute_mapping = {**custom_attribute_mapping}

    # Define a mapping between attribute types and corresponding Faker methods
    type_mapping = {
        int: "random_int",
        str: "word",
        float: "random_number",
        bool: "boolean",
        date: "date_between",
        datetime: "date_time_between",  # Add this line
    }

    def generate_instance(cls: Type) -> Type:
        """
        Generates an instance of the specified class.

        Args:
            cls (Type): The class type for which an instance will be generated.

        Returns:
            Type: An instance of the specified class.
        """

        kwargs = {}
        for attribute in attr.fields(cls):
            name = attribute.name
            attr_type = attribute.type
            faker_method_name = attribute_mapping.get(
                name, name
            )  # First, check attribute-based mapping
            # Check if the attribute type is a list of attrs classes
            if get_origin(attr_type) is list:
                element_type = get_args(attr_type)[0]
                if attr.has(element_type):
                    kwargs[name] = [
                        generate_instance(element_type)
                        for _ in range(list_min, random.randint(list_min, list_max))
                    ]
                # Check if the attribute type is a list of primitive types
                elif element_type in type_mapping:
                    faker_method = getattr(fake, type_mapping[element_type])
                    kwargs[name] = [faker_method() for _ in range(2)]
                else:
                    kwargs[name] = None
            # Check if the attribute type is an attrs class
            elif attr.has(attr_type):
                kwargs[name] = generate_instance(attr_type)
            else:
                # Try to call a method with the same name as the attribute
                faker_method = getattr(fake, faker_method_name, None)
                # If the method is not found, use the type-based mapping
                if faker_method is None and attr_type in type_mapping:
                    faker_method = getattr(fake, type_mapping[attr_type])
                if callable(faker_method):
                    kwargs[name] = faker_method()
                else:
                    kwargs[name] = None  # Default value if no method matches
        return cls(**kwargs)

    if seed is not None:  # Ensure that the seed is used if provided
        fake = Faker()
        fake.seed_instance(seed)
    else:
        fake = Faker()

    instances = [generate_instance(cls) for _ in range(num_records)]

    return instances


def wrong_type(value: Any) -> Any:
    """
    Returns a value of a wrong data type.

    Args:
        value (Any): The original value.

    Returns:
        Any: A value of a wrong data type.
    """
    type_mapping = {
        int: [
            "wrong",
            3.14,
            True,
            date(1991, 9, 23),
            datetime(1991, 9, 23, 12, 59, 59),
        ],
        str: [
            42,
            3.14,
            False,
            date(1991, 9, 23),
            datetime(1991, 9, 23, 12, 59, 59),
        ],
        float: [
            "wrong",
            42,
            False,
            date(1991, 9, 23),
            datetime(1991, 9, 23, 12, 59, 59),
        ],
        bool: [
            "wrong",
            42,
            3.14,
            date(1991, 9, 23),
            datetime(1991, 9, 23, 12, 59, 59),
        ],
        date: [
            "wrong",
            42,
            True,
            3.14,
            datetime(1991, 9, 23, 12, 59, 59),
        ],
        datetime: [
            "wrong",
            42,
            True,
            3.14,
            date(1991, 9, 23),
        ],
    }
    wrong_values = type_mapping.get(type(value), [None])
    return random.choice(wrong_values)


def emulate_bad_data(
    instances: List[Type],
    remove_prob: float = 0.1,
    wrong_type_prob: float = 0.15,
    null_prob: float = 0.2,
    null_values: List = [None, "NULL", "<null>"],
    seed: Optional[int] = None,
) -> List[Dict]:
    """
    Emulates bad data by randomly removing or nullifying attributes of instances.
    This can be made consisten if a `seed` is set.

    Args:
        instances (List[Type]): List of instances to be modified.
        remove_prob (float, optional): Probability of removing an attribute. Defaults to 0.1.
        wrong_type_prob (float, optional): Probability of setting an attribute to a different type value. Defaults to 0.15.
        null_prob (float, optional): Probability of setting an attribute to a null value. Defaults to 0.2.
        null_values (List, optional): List of possible null values. Defaults to [None, "NULL", "<null>"].
        seed (Optional[int], optional): Seed for random number generator for reproducibility. Defaults to None.

    Returns:
        List[Dict]: List of dictionaries, each representing an instance with emulated bad data.
    """
    if seed is not None:
        random.seed(seed)
    bad_data = []
    for instance in instances:
        instance_dict = attr.asdict(instance)
        for key in list(instance_dict.keys()):
            rand_val = random.random()
            # Randomly remove some attributes
            if rand_val < remove_prob:
                del instance_dict[key]
            # Randomly set some attributes to a wrong data type
            elif rand_val < wrong_type_prob:
                instance_dict[key] = wrong_type(instance_dict[key])
            # Randomly set some attributes to a null value
            elif rand_val < null_prob:
                instance_dict[key] = random.choice(null_values)
        bad_data.append(instance_dict)
    return bad_data


if __name__ == "__main__":

    @attr.s
    class Hobby:
        tag: str = attr.ib()

    @attr.s
    class Address:
        street: str = attr.ib()
        city: str = attr.ib()

    @attr.s
    class Person:
        name: str = attr.ib()
        age: int = attr.ib()
        birth_date: date = attr.ib()
        address: Address = attr.ib()
        hobbies: List[Hobby] = attr.ib()
        investments: List[int] = attr.ib()
        last_login: datetime = attr.ib()

    seed = 42
    num_records = 5
    people = data_factory(Person, num_records, seed=seed)

    print()
    print("List People Data:")
    for person in people:
        print(person)
    print()

    # Emulate bad data with custom probabilities and seed
    remove_probability = 0.1
    wrong_type_prob = 0.15
    null_probability = 0.2
    bad_people_data = emulate_bad_data(
        instances=people,
        remove_prob=remove_probability,
        wrong_type_prob=wrong_type_prob,
        null_prob=null_probability,
        null_values=[None, "NULL", "<null>", 0],
        seed=seed,
    )

    # Print bad data
    print()
    print("List People Bad Data:")
    for person in bad_people_data:
        print(person)

## generate_time_series.py
# %%
from typing_extensions import Literal
from typing import List, Optional
from dataclasses import dataclass

import numpy as np
import pandas as pd


@dataclass
class SeasonalityRecurrencePeriods:
    """
    Data class to represent the seasonality patterns to be applied to a time series.

    Attributes:
        periods (int): The number of periods within which the seasonality pattern occurs.
                      For example, if the time series is daily and periods=7, the seasonality
                      pattern will repeat every 7 days.

        recurrence (int, optional): The number of times the seasonality pattern occurs within
                                    the specified periods. Default is 1.
                                    For example, if periods=7 and recurrence=2, the pattern
                                    will occur twice within every 7-day period.
    """

    periods: int  # within these periods
    recurrence: int = 1  # number of times it occurs


def generate_time_series(
    n: int,
    freq: str = "D",
    start_date: str = "2022-01-01",
    seasonality_recurrence_periods: Optional[List[SeasonalityRecurrencePeriods]] = None,
    trend: Literal["up", "down", "random"] = None,
    noise_std: float = 0.1,
    seed: Optional[int] = None,
    min_value: Optional[float] = None,
    max_value: Optional[float] = None,
) -> pd.Series:
    """
    Generate synthetic time series data.

    Args:
        n (int): Number of data points.
        freq (str): Frequency of the time series ('D' for daily, 'H' for hourly, etc.).
        start_date (str): Start date of the time series.
        seasonality_periods (Optional[List[SeasonalityRecurrencePeriods]]): List of SeasonalityRecurrencePeriods for multiple seasonality patterns.
        trend (Literal['up', 'down', 'random']): Type of trend ('up', 'down', 'random').
        noise_std (float): Standard deviation of the noise.
        seed (Optional[int]): Optional Random seed for reproducibility.
        min_value (Optional[float]): Optional Minimum value for the time series.
        max_value (Optional[float]): Optional Maximum value for the time series.

    Returns:
        pd.Series: Generated time series data.
    """

    # Set seed for consistency in output
    if seed is not None:
        np.random.seed(seed)

    # Generate time index
    time_index = pd.date_range(start=start_date, periods=n, freq=freq)

    # Initialize time series
    ts = np.zeros(n)

    # Add multiple seasonality_recurrence_periods
    if seasonality_recurrence_periods:
        for season in seasonality_recurrence_periods:
            freq = season.recurrence
            periods = season.periods
            omega = 2 * np.pi / periods
            ts += np.sin(omega * np.arange(n) * freq)

    # Add trend
    if trend == "up":
        ts += np.linspace(0, 1, n)
    elif trend == "down":
        ts += np.linspace(1, 0, n)
    elif trend == "random":
        ts += np.random.uniform(-1, 1, n).cumsum()

    # Add noise
    ts += np.random.normal(scale=noise_std, size=n)

    # Rescale to fit within min_value and max_value
    if min_value is not None and max_value is not None:
        ts_min = np.min(ts)
        ts_max = np.max(ts)
        ts = min_value + (max_value - min_value) * (ts - ts_min) / (ts_max - ts_min)

    return pd.Series(ts, index=time_index)


@dataclass
class Adjustment:
    """
    Data class to represent an adjustment to be applied to a time series.

    Attributes:
        adjustment_name (str): The name of the adjustment, used for labeling in the DataFrame.
        start_date (str): The start date for the adjustment in 'YYYY-MM-DD' format.
        end_date (str): The end date for the adjustment in 'YYYY-MM-DD' format.
        adjustment_type (Literal["percentage", "value"]): The type of adjustment to apply.
            - 'percentage': Applies a percentage adjustment to the existing value.
            - 'value': Replaces the existing value with the specified adjustment_value.
        adjustment_value (float): The value of the adjustment.
            - For 'percentage': A positive or negative float representing the percentage change.
            - For 'value': The value to replace the existing data with.
    """

    adjustment_name: str
    start_date: str
    end_date: str
    adjustment_type: Literal["percentage", "value"]  # 'percentage' or 'value'
    adjustment_value: float


def apply_adjustments(ts: pd.Series, adjustments: list[Adjustment]) -> pd.DataFrame:
    """
    Apply adjustments to a time series.
    This will cumulatively apply the adjustments in the order they are loaded.
    If `Adjustment.adjustment_type` = "percentage" it will be a percentage of the existing value
    If `Adjustment.adjustment_type` = "value" it will be the explicit value and override the existing value

    Args:
        ts (pd.Series): Original time series.
        adjustments (list[Adjustment]): List of Adjustment objects containing adjustment parameters.

    Returns:
        pd.DataFrame: DataFrame containing original, adjusted with column names based on the `Adjustment.adjustment_name`,
          and final time series.
    """

    df = pd.DataFrame({"original": ts, "final": ts})

    for adj in adjustments:
        name = adj.adjustment_name
        start_date = adj.start_date
        end_date = adj.end_date
        adj_type = adj.adjustment_type
        adj_value = adj.adjustment_value

        mask = (df.index >= start_date) & (df.index <= end_date)

        if adj_type == "percentage":
            df[name] = (
                df["final"] * (1 + adj_value)
                if adj_value >= 0
                else df["final"] * (1 - abs(adj_value))
            )
        elif adj_type == "value":
            df[name] = adj_value

        df[name].where(mask, df["final"], inplace=True)
        df["final"] = df[name]
    # Reorder columns to make sure 'final' is the last column
    column_order = [col for col in df.columns if col != "final"] + ["final"]
    df = df[column_order]

    return df


# %%
if __name__ == "__main__":
    # Example usage
    n = 210
    freq = "D"
    start_date = "2022-01-01"
    seasonality_recurrence_periods = [
        SeasonalityRecurrencePeriods(periods=30, recurrence=1),  # Monthly seasonality
        SeasonalityRecurrencePeriods(periods=7, recurrence=1),  # Weekly seasonality
    ]
    trend = "up"  # Can be 'up', 'down', or 'random'
    noise_std = 0.3
    seed = 42
    min_value = 0  # Optional minimum value
    max_value = 1  # Optional maximum value

    time_series_df = generate_time_series(
        n=n,
        freq=freq,
        start_date=start_date,
        seasonality_recurrence_periods=seasonality_recurrence_periods,
        trend=trend,
        noise_std=noise_std,
        seed=seed,
        min_value=min_value,
        max_value=max_value,
    )
    time_series_df.plot()

    adjustments = [
        Adjustment("Holiday", "2022-12-01", "2022-12-31", "percentage", 0.1),
        Adjustment("Outage", "2022-06-01", "2022-06-30", "value", -0.5),
        Adjustment("Holiday2", "2022-06-01", "2022-06-30", "percentage", 0.9),
    ]

    adjusted_time_series_df = apply_adjustments(time_series_df, adjustments)
    adjusted_time_series_df.plot()
    print(adjusted_time_series_df)

    # %%

## pydantic_fake_data_factory.py
from typing import Any, get_args, get_origin, Type, Dict, Optional, List
import random
from datetime import date, datetime

from pydantic import BaseModel, Field
from faker import Faker


def pydantic_data_factory(
    cls: Type[BaseModel],
    num_records: int,
    custom_attribute_mapping: Dict[str, str] = {},
    seed: Optional[int] = None,
    list_min: int = 0,
    list_max: int = 10,
) -> List[BaseModel]:
    """
    Generates instances of a given class using the Faker library.
    This can be made consistent if a `seed` is set.

    Args:
        cls (Type): The class type for which instances will be generated.
        num_records (int): The number of records to generate.
        custom_attribute_mapping (Dict[str, str], optional): Custom mapping between attribute names and Faker methods.
        seed (Optional[int], optional): Seed for the random number generator to ensure consistency. Defaults to None.
        list_min (int, optional): Minimum number of elements in a list attribute. Defaults to 0.
        list_max (int, optional): Maximum number of elements in a list attribute. Defaults to 10.

    Returns:
        List: A list of instances of the specified class.
    """

    # Define a mapping between attribute names and corresponding Faker methods
    attribute_mapping = {**custom_attribute_mapping}

    # Define a mapping between attribute types and corresponding Faker methods
    type_mapping = {
        int: "random_int",
        str: "word",
        float: "random_number",
        bool: "boolean",
        date: "date_between",
        datetime: "date_time_between",
    }

    def generate_instance(cls: Type[BaseModel]) -> BaseModel:
        """
        Generates an instance of the specified class.

        Args:
            cls (Type): The class type for which an instance will be generated.

        Returns:
            Type: An instance of the specified class.
        """

        kwargs = {}
        for name, field in cls.model_fields.items():
            attr_type = field.annotation
            faker_method_name = attribute_mapping.get(name, name)
            if get_origin(attr_type) is list:
                element_type = get_args(attr_type)[0]
                if issubclass(element_type, BaseModel):
                    kwargs[name] = [
                        generate_instance(element_type)
                        for _ in range(list_min, random.randint(list_min, list_max))
                    ]
                elif element_type in type_mapping:
                    faker_method = getattr(fake, type_mapping[element_type])
                    kwargs[name] = [faker_method() for _ in range(2)]
                else:
                    kwargs[name] = None
            elif issubclass(attr_type, BaseModel):
                kwargs[name] = generate_instance(attr_type)
            else:
                faker_method = getattr(fake, faker_method_name, None)
                if faker_method is None and attr_type in type_mapping:
                    faker_method = getattr(fake, type_mapping[attr_type])
                if callable(faker_method):
                    kwargs[name] = faker_method()
                else:
                    kwargs[name] = None
        return cls(**kwargs)

    if seed is not None:
        fake = Faker()
        fake.seed_instance(seed)
    else:
        fake = Faker()

    instances = [generate_instance(cls) for _ in range(num_records)]

    return instances


def pydantic_wrong_type(value: Any) -> Any:
    """
    Returns a value of a wrong data type.

    Args:
        value (Any): The original value.

    Returns:
        Any: A value of a wrong data type.
    """
    type_mapping = {
        int: [
            "wrong",
            3.14,
            True,
            date(1991, 9, 23),
            datetime(1991, 9, 23, 12, 59, 59),
        ],
        str: [
            42,
            3.14,
            False,
            date(1991, 9, 23),
            datetime(1991, 9, 23, 12, 59, 59),
        ],
        float: [
            "wrong",
            42,
            False,
            date(1991, 9, 23),
            datetime(1991, 9, 23, 12, 59, 59),
        ],
        bool: [
            "wrong",
            42,
            3.14,
            date(1991, 9, 23),
            datetime(1991, 9, 23, 12, 59, 59),
        ],
        date: [
            "wrong",
            42,
            True,
            3.14,
            datetime(1991, 9, 23, 12, 59, 59),
        ],
        datetime: [
            "wrong",
            42,
            True,
            3.14,
            date(1991, 9, 23),
        ],
    }
    wrong_values = type_mapping.get(type(value), [None])
    return random.choice(wrong_values)


def pydantic_emulate_bad_data(
    instances: List[BaseModel],
    remove_prob: float = 0.1,
    wrong_type_prob: float = 0.15,
    null_prob: float = 0.2,
    null_values: List = [None, "NULL", "<null>"],
    seed: Optional[int] = None,
) -> List[Dict]:
    """
    Emulates bad data by randomly removing or nullifying attributes of instances.
    This can be made consistent if a `seed` is set.

    Args:
        instances (List[Type]): List of instances to be modified.
        remove_prob (float, optional): Probability of removing an attribute. Defaults to 0.1.
        wrong_type_prob (float, optional): Probability of setting an attribute to a different type value. Defaults to 0.15.
        null_prob (float, optional): Probability of setting an attribute to a null value. Defaults to 0.2.
        null_values (List, optional): List of possible null values. Defaults to [None, "NULL", "<null>"].
        seed (Optional[int], optional): Seed for random number generator for reproducibility. Defaults to None.

    Returns:
        List[Dict]: List of dictionaries, each representing an instance with emulated bad data.
    """
    if seed is not None:
        random.seed(seed)
    bad_data = []
    for instance in instances:
        instance_dict = instance.dict()
        for key in list(instance_dict.keys()):
            rand_val = random.random()
            if rand_val < remove_prob:
                del instance_dict[key]
            elif rand_val < wrong_type_prob:
                instance_dict[key] = pydantic_wrong_type(instance_dict[key])
            elif rand_val < null_prob:
                instance_dict[key] = random.choice(null_values)
        bad_data.append(instance_dict)
    return bad_data


if __name__ == "__main__":

    class Hobby(BaseModel):
        tag: str

    class Address(BaseModel):
        street: str
        city: str

    class Person(BaseModel):
        name: str
        age: int
        birth_date: date
        address: Address
        hobbies: List[Hobby]
        investments: List[int]
        last_login: datetime

    seed = 42
    num_records = 5
    people = pydantic_data_factory(Person, num_records, seed=seed)

    print()
    print("List People Data:")
    for person in people:
        print(person)
    print()

    remove_probability = 0.1
    wrong_type_prob = 0.15
    null_probability = 0.2
    bad_people_data = pydantic_emulate_bad_data(
        instances=people,
        remove_prob=remove_probability,
        wrong_type_prob=wrong_type_prob,
        null_prob=null_probability,
        null_values=[None, "NULL", "<null>", 0],
        seed=seed,
    )

    print()
    print("List People Bad Data:")
    for person in bad_people_data:
        print(person)

## requirements.txt
attrs
Faker
matplotlib
pandas
pydantic
	from typing import Any, get_args, get_origin, Type, Dict, Optional, List
	import random
	from datetime import date, datetime

	import attr
	from faker import Faker


	def data_factory(
	cls: Type,
	num_records: int,
	custom_attribute_mapping: Dict[str, str] = {},
	seed: Optional[int] = None,
	list_min: int = 0,
	list_max: int = 10,
	) -> List:
	"""
	Generates instances of a given class using the Faker library.
	This can be made consisten if a `seed` is set.

	Args:
	cls (Type): The class type for which instances will be generated.
	num_records (int): The number of records to generate.
	custom_attribute_mapping (Dict[str, str], optional): Custom mapping between attribute names and Faker methods.
	seed (Optional[int], optional): Seed for the random number generator to ensure consistency. Defaults to None.
	list_min (int, optional): Minimum number of elements in a list attribute. Defaults to 0.
	list_max (int, optional): Maximum number of elements in a list attribute. Defaults to 10.

	Returns:
	List: A list of instances of the specified class.
	"""

	# Define a mapping between attribute names and corresponding Faker methods
	attribute_mapping = {**custom_attribute_mapping}

	# Define a mapping between attribute types and corresponding Faker methods
	type_mapping = {
	int: "random_int",
	str: "word",
	float: "random_number",
	bool: "boolean",
	date: "date_between",
	datetime: "date_time_between", # Add this line
	}

	def generate_instance(cls: Type) -> Type:
	"""
	Generates an instance of the specified class.

	Args:
	cls (Type): The class type for which an instance will be generated.

	Returns:
	Type: An instance of the specified class.
	"""

	kwargs = {}
	for attribute in attr.fields(cls):
	name = attribute.name
	attr_type = attribute.type
	faker_method_name = attribute_mapping.get(
	name, name
	) # First, check attribute-based mapping
	# Check if the attribute type is a list of attrs classes
	if get_origin(attr_type) is list:
	element_type = get_args(attr_type)[0]
	if attr.has(element_type):
	kwargs[name] = [
	generate_instance(element_type)
	for _ in range(list_min, random.randint(list_min, list_max))
	]
	# Check if the attribute type is a list of primitive types
	elif element_type in type_mapping:
	faker_method = getattr(fake, type_mapping[element_type])
	kwargs[name] = [faker_method() for _ in range(2)]
	else:
	kwargs[name] = None
	# Check if the attribute type is an attrs class
	elif attr.has(attr_type):
	kwargs[name] = generate_instance(attr_type)
	else:
	# Try to call a method with the same name as the attribute
	faker_method = getattr(fake, faker_method_name, None)
	# If the method is not found, use the type-based mapping
	if faker_method is None and attr_type in type_mapping:
	faker_method = getattr(fake, type_mapping[attr_type])
	if callable(faker_method):
	kwargs[name] = faker_method()
	else:
	kwargs[name] = None # Default value if no method matches
	return cls(**kwargs)

	if seed is not None: # Ensure that the seed is used if provided
	fake = Faker()
	fake.seed_instance(seed)
	else:
	fake = Faker()

	instances = [generate_instance(cls) for _ in range(num_records)]

	return instances


	def wrong_type(value: Any) -> Any:
	"""
	Returns a value of a wrong data type.

	Args:
	value (Any): The original value.

	Returns:
	Any: A value of a wrong data type.
	"""
	type_mapping = {
	int: [
	"wrong",
	3.14,
	True,
	date(1991, 9, 23),
	datetime(1991, 9, 23, 12, 59, 59),
	],
	str: [
	42,
	3.14,
	False,
	date(1991, 9, 23),
	datetime(1991, 9, 23, 12, 59, 59),
	],
	float: [
	"wrong",
	42,
	False,
	date(1991, 9, 23),
	datetime(1991, 9, 23, 12, 59, 59),
	],
	bool: [
	"wrong",
	42,
	3.14,
	date(1991, 9, 23),
	datetime(1991, 9, 23, 12, 59, 59),
	],
	date: [
	"wrong",
	42,
	True,
	3.14,
	datetime(1991, 9, 23, 12, 59, 59),
	],
	datetime: [
	"wrong",
	42,
	True,
	3.14,
	date(1991, 9, 23),
	],
	}
	wrong_values = type_mapping.get(type(value), [None])
	return random.choice(wrong_values)


	def emulate_bad_data(
	instances: List[Type],
	remove_prob: float = 0.1,
	wrong_type_prob: float = 0.15,
	null_prob: float = 0.2,
	null_values: List = [None, "NULL", "<null>"],
	seed: Optional[int] = None,
	) -> List[Dict]:
	"""
	Emulates bad data by randomly removing or nullifying attributes of instances.
	This can be made consisten if a `seed` is set.

	Args:
	instances (List[Type]): List of instances to be modified.
	remove_prob (float, optional): Probability of removing an attribute. Defaults to 0.1.
	wrong_type_prob (float, optional): Probability of setting an attribute to a different type value. Defaults to 0.15.
	null_prob (float, optional): Probability of setting an attribute to a null value. Defaults to 0.2.
	null_values (List, optional): List of possible null values. Defaults to [None, "NULL", "<null>"].
	seed (Optional[int], optional): Seed for random number generator for reproducibility. Defaults to None.

	Returns:
	List[Dict]: List of dictionaries, each representing an instance with emulated bad data.
	"""
	if seed is not None:
	random.seed(seed)
	bad_data = []
	for instance in instances:
	instance_dict = attr.asdict(instance)
	for key in list(instance_dict.keys()):
	rand_val = random.random()
	# Randomly remove some attributes
	if rand_val < remove_prob:
	del instance_dict[key]
	# Randomly set some attributes to a wrong data type
	elif rand_val < wrong_type_prob:
	instance_dict[key] = wrong_type(instance_dict[key])
	# Randomly set some attributes to a null value
	elif rand_val < null_prob:
	instance_dict[key] = random.choice(null_values)
	bad_data.append(instance_dict)
	return bad_data


	if __name__ == "__main__":

	@attr.s
	class Hobby:
	tag: str = attr.ib()

	@attr.s
	class Address:
	street: str = attr.ib()
	city: str = attr.ib()

	@attr.s
	class Person:
	name: str = attr.ib()
	age: int = attr.ib()
	birth_date: date = attr.ib()
	address: Address = attr.ib()
	hobbies: List[Hobby] = attr.ib()
	investments: List[int] = attr.ib()
	last_login: datetime = attr.ib()

	seed = 42
	num_records = 5
	people = data_factory(Person, num_records, seed=seed)

	print()
	print("List People Data:")
	for person in people:
	print(person)
	print()

	# Emulate bad data with custom probabilities and seed
	remove_probability = 0.1
	wrong_type_prob = 0.15
	null_probability = 0.2
	bad_people_data = emulate_bad_data(
	instances=people,
	remove_prob=remove_probability,
	wrong_type_prob=wrong_type_prob,
	null_prob=null_probability,
	null_values=[None, "NULL", "<null>", 0],
	seed=seed,
	)

	# Print bad data
	print()
	print("List People Bad Data:")
	for person in bad_people_data:
	print(person)
	# %%
	from typing_extensions import Literal
	from typing import List, Optional
	from dataclasses import dataclass

	import numpy as np
	import pandas as pd


	@dataclass
	class SeasonalityRecurrencePeriods:
	"""
	Data class to represent the seasonality patterns to be applied to a time series.

	Attributes:
	periods (int): The number of periods within which the seasonality pattern occurs.
	For example, if the time series is daily and periods=7, the seasonality
	pattern will repeat every 7 days.

	recurrence (int, optional): The number of times the seasonality pattern occurs within
	the specified periods. Default is 1.
	For example, if periods=7 and recurrence=2, the pattern
	will occur twice within every 7-day period.
	"""

	periods: int # within these periods
	recurrence: int = 1 # number of times it occurs


	def generate_time_series(
	n: int,
	freq: str = "D",
	start_date: str = "2022-01-01",
	seasonality_recurrence_periods: Optional[List[SeasonalityRecurrencePeriods]] = None,
	trend: Literal["up", "down", "random"] = None,
	noise_std: float = 0.1,
	seed: Optional[int] = None,
	min_value: Optional[float] = None,
	max_value: Optional[float] = None,
	) -> pd.Series:
	"""
	Generate synthetic time series data.

	Args:
	n (int): Number of data points.
	freq (str): Frequency of the time series ('D' for daily, 'H' for hourly, etc.).
	start_date (str): Start date of the time series.
	seasonality_periods (Optional[List[SeasonalityRecurrencePeriods]]): List of SeasonalityRecurrencePeriods for multiple seasonality patterns.
	trend (Literal['up', 'down', 'random']): Type of trend ('up', 'down', 'random').
	noise_std (float): Standard deviation of the noise.
	seed (Optional[int]): Optional Random seed for reproducibility.
	min_value (Optional[float]): Optional Minimum value for the time series.
	max_value (Optional[float]): Optional Maximum value for the time series.

	Returns:
	pd.Series: Generated time series data.
	"""

	# Set seed for consistency in output
	if seed is not None:
	np.random.seed(seed)

	# Generate time index
	time_index = pd.date_range(start=start_date, periods=n, freq=freq)

	# Initialize time series
	ts = np.zeros(n)

	# Add multiple seasonality_recurrence_periods
	if seasonality_recurrence_periods:
	for season in seasonality_recurrence_periods:
	freq = season.recurrence
	periods = season.periods
	omega = 2 * np.pi / periods
	ts += np.sin(omega * np.arange(n) * freq)

	# Add trend
	if trend == "up":
	ts += np.linspace(0, 1, n)
	elif trend == "down":
	ts += np.linspace(1, 0, n)
	elif trend == "random":
	ts += np.random.uniform(-1, 1, n).cumsum()

	# Add noise
	ts += np.random.normal(scale=noise_std, size=n)

	# Rescale to fit within min_value and max_value
	if min_value is not None and max_value is not None:
	ts_min = np.min(ts)
	ts_max = np.max(ts)
	ts = min_value + (max_value - min_value) * (ts - ts_min) / (ts_max - ts_min)

	return pd.Series(ts, index=time_index)


	@dataclass
	class Adjustment:
	"""
	Data class to represent an adjustment to be applied to a time series.

	Attributes:
	adjustment_name (str): The name of the adjustment, used for labeling in the DataFrame.
	start_date (str): The start date for the adjustment in 'YYYY-MM-DD' format.
	end_date (str): The end date for the adjustment in 'YYYY-MM-DD' format.
	adjustment_type (Literal["percentage", "value"]): The type of adjustment to apply.
	- 'percentage': Applies a percentage adjustment to the existing value.
	- 'value': Replaces the existing value with the specified adjustment_value.
	adjustment_value (float): The value of the adjustment.
	- For 'percentage': A positive or negative float representing the percentage change.
	- For 'value': The value to replace the existing data with.
	"""

	adjustment_name: str
	start_date: str
	end_date: str
	adjustment_type: Literal["percentage", "value"] # 'percentage' or 'value'
	adjustment_value: float


	def apply_adjustments(ts: pd.Series, adjustments: list[Adjustment]) -> pd.DataFrame:
	"""
	Apply adjustments to a time series.
	This will cumulatively apply the adjustments in the order they are loaded.
	If `Adjustment.adjustment_type` = "percentage" it will be a percentage of the existing value
	If `Adjustment.adjustment_type` = "value" it will be the explicit value and override the existing value

	Args:
	ts (pd.Series): Original time series.
	adjustments (list[Adjustment]): List of Adjustment objects containing adjustment parameters.

	Returns:
	pd.DataFrame: DataFrame containing original, adjusted with column names based on the `Adjustment.adjustment_name`,
	and final time series.
	"""

	df = pd.DataFrame({"original": ts, "final": ts})

	for adj in adjustments:
	name = adj.adjustment_name
	start_date = adj.start_date
	end_date = adj.end_date
	adj_type = adj.adjustment_type
	adj_value = adj.adjustment_value

	mask = (df.index >= start_date) & (df.index <= end_date)

	if adj_type == "percentage":
	df[name] = (
	df["final"] * (1 + adj_value)
	if adj_value >= 0
	else df["final"] * (1 - abs(adj_value))
	)
	elif adj_type == "value":
	df[name] = adj_value

	df[name].where(mask, df["final"], inplace=True)
	df["final"] = df[name]
	# Reorder columns to make sure 'final' is the last column
	column_order = [col for col in df.columns if col != "final"] + ["final"]
	df = df[column_order]

	return df


	# %%
	if __name__ == "__main__":
	# Example usage
	n = 210
	freq = "D"
	start_date = "2022-01-01"
	seasonality_recurrence_periods = [
	SeasonalityRecurrencePeriods(periods=30, recurrence=1), # Monthly seasonality
	SeasonalityRecurrencePeriods(periods=7, recurrence=1), # Weekly seasonality
	]
	trend = "up" # Can be 'up', 'down', or 'random'
	noise_std = 0.3
	seed = 42
	min_value = 0 # Optional minimum value
	max_value = 1 # Optional maximum value

	time_series_df = generate_time_series(
	n=n,
	freq=freq,
	start_date=start_date,
	seasonality_recurrence_periods=seasonality_recurrence_periods,
	trend=trend,
	noise_std=noise_std,
	seed=seed,
	min_value=min_value,
	max_value=max_value,
	)
	time_series_df.plot()

	adjustments = [
	Adjustment("Holiday", "2022-12-01", "2022-12-31", "percentage", 0.1),
	Adjustment("Outage", "2022-06-01", "2022-06-30", "value", -0.5),
	Adjustment("Holiday2", "2022-06-01", "2022-06-30", "percentage", 0.9),
	]

	adjusted_time_series_df = apply_adjustments(time_series_df, adjustments)
	adjusted_time_series_df.plot()
	print(adjusted_time_series_df)

	# %%