selflein/format_df.py

## format_df.py
import pandas as pd


def format_best(to_bold, numeric_df=None, order="max", format_str="\\bfseries{{{}}}"):
    """Format the row with the best value within each column of a pandas DataFrame in a particular way.

    Args:
        to_bold: pandas.DataFrame to operate on in-place.
        numeric_df: Optional pandas.DataFrame from which the best rows should be infered.
        order: Define what is the row that should be formatted. Supported: `max`, `min`, `second_highest`, `second_lowest`. If it is a list then it is interpreted in a per column way, otherwise applies to all columns.
        format_str: Formatting that should be applied to the row with the best value.
    """
    if numeric_df is None:
        numeric_df = to_bold

    if isinstance(order, str):
        order = [
            order,
        ] * len(to_bold.columns)

    for k, col_order in zip(to_bold.columns, order):
        float_series = numeric_df[k].astype(float)
        if col_order == "max":
            best = float_series.max()
        elif col_order == "min":
            best = float_series.min()
        elif col_order == "second_highest":
            best = float_series.nlargest(2).min()
        elif col_order == "second_lowest":
            best = float_series.nsmallest(2).max()
        else:
            raise ValueError("Ordering not supported.")

        best = float_series == best
        best_idxs = best[best].index.values
        for best_idxs in best_idxs:
            to_bold.loc[best_idxs, k] = format_str.format(to_bold.loc[best_idxs, k])


if __name__ == "__main__":
    data = {
        "Method 1": [0.5, 0.9],
        "Method 2": [0.3, 1.0]
    }
    example_df = pd.DataFrame(data).T
    format_best(example_df)
    print(example_df.to_string())
	import pandas as pd


	def format_best(to_bold, numeric_df=None, order="max", format_str="\\bfseries{{{}}}"):
	"""Format the row with the best value within each column of a pandas DataFrame in a particular way.

	Args:
	to_bold: pandas.DataFrame to operate on in-place.
	numeric_df: Optional pandas.DataFrame from which the best rows should be infered.
	order: Define what is the row that should be formatted. Supported: `max`, `min`, `second_highest`, `second_lowest`. If it is a list then it is interpreted in a per column way, otherwise applies to all columns.
	format_str: Formatting that should be applied to the row with the best value.
	"""
	if numeric_df is None:
	numeric_df = to_bold

	if isinstance(order, str):
	order = [
	order,
	] * len(to_bold.columns)

	for k, col_order in zip(to_bold.columns, order):
	float_series = numeric_df[k].astype(float)
	if col_order == "max":
	best = float_series.max()
	elif col_order == "min":
	best = float_series.min()
	elif col_order == "second_highest":
	best = float_series.nlargest(2).min()
	elif col_order == "second_lowest":
	best = float_series.nsmallest(2).max()
	else:
	raise ValueError("Ordering not supported.")

	best = float_series == best
	best_idxs = best[best].index.values
	for best_idxs in best_idxs:
	to_bold.loc[best_idxs, k] = format_str.format(to_bold.loc[best_idxs, k])


	if __name__ == "__main__":
	data = {
	"Method 1": [0.5, 0.9],
	"Method 2": [0.3, 1.0]
	}
	example_df = pd.DataFrame(data).T
	format_best(example_df)
	print(example_df.to_string())