Last active
July 17, 2020 18:25
-
-
Save honno/6693d7d9182f71d99d1424a1d5a67f34 to your computer and use it in GitHub Desktop.
profiling concept for rngtest
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from rngtest.profiling import profile, multi_profile | |
def get_columns(df): | |
for col in df: | |
yield df[col] | |
@profile | |
def concat_by_column(df): | |
cols = get_columns(df) | |
series = pd.concat(cols) | |
return series | |
@profile | |
def concat_by_row(df): | |
df = df.transpose() | |
cols = get_columns(df) | |
series = pd.concat(cols) | |
return series | |
@multi_profile | |
def by_column(df): | |
multiple_series = get_columns(df) | |
return multiple_series |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Methods to enable profiling functionality | |
A profile is a particular view of data. Data is the DataFrame made by reading a | |
text file that contains RNG outputs. Profiles are Series that are made from the | |
data, i.e. sequences of values that are actually usable in randomness tests. | |
""" | |
import inspect | |
from functools import wraps | |
from importlib.util import module_from_spec | |
from importlib.util import spec_from_file_location | |
from typing import Iterator | |
from typing import Tuple | |
import pandas as pd | |
# These attributes are set in the decorated functions objects | |
# They are used to identify profiles when loading a user-inputted script | |
PROFILE_KEY = "is_profile" | |
MULTI_PROFILE_KEY = "is_multi_profile" | |
def profile(func): | |
"""Converts methods that return a Series into profiles""" | |
@wraps(func) | |
def wrapper(df: pd.DataFrame) -> Tuple[str, pd.Series]: | |
name = func.__name__ | |
series = func(df) | |
series = series.rename(name) | |
return name, series | |
setattr(wrapper, PROFILE_KEY, True) | |
return wrapper | |
def multi_profile(func): | |
"""Converts methods that return multiple Series into profiles""" | |
@wraps(func) | |
def wrapper(df: pd.DataFrame) -> Iterator[Tuple[str, pd.Series]]: | |
name = func.__name__ | |
multiple_series = func(df) | |
for i, series in enumerate(multiple_series): | |
profile_name = name + f"_{i}" | |
series = series.rename(profile_name) | |
yield profile_name, series | |
setattr(wrapper, MULTI_PROFILE_KEY, True) | |
return wrapper | |
def get_profile_functions(functions): | |
"""Finds what methods are profiles""" | |
for func in functions: | |
try: | |
if getattr(func, PROFILE_KEY): | |
yield func | |
except AttributeError: | |
pass | |
def get_multi_profile_functions(functions): | |
"""Finds what methods are multi-profiles""" | |
for func in functions: | |
try: | |
if getattr(func, MULTI_PROFILE_KEY): | |
yield func | |
except AttributeError: | |
pass | |
def profiled_data(df, profiles_path) -> Iterator[Tuple[str, pd.Series]]: | |
"""Returns all profiles in a user-inputted profile script""" | |
spec = spec_from_file_location("..profiles", profiles_path) | |
module = module_from_spec(spec) | |
spec.loader.exec_module(module) | |
# members is a list of (function_name, function_obj) tuples | |
members = inspect.getmembers(module, inspect.isfunction) | |
functions = [member[1] for member in members] | |
profiles = get_profile_functions(functions) | |
for profile in profiles: | |
name, series = profile(df) | |
yield name, series | |
multi_profiles = get_multi_profile_functions(functions) | |
for func in multi_profiles: | |
profiles = func(df) | |
for name, series in profiles: | |
yield name, series |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment