prrao87/base_utils.py

## base_utils.py
import pandas as pd
from sklearn.metrics import f1_score, accuracy_score


class Base:
    """Base class that houses common utilities for reading in test data
    and calculating model accuracy and F1 scores.
    """
    def __init__(self) -> None:
        pass

    def read_data(self, fname: str, lower_case: bool=False,
                  colnames=['truth', 'text']) -> pd.DataFrame:
        "Read in test data into a Pandas DataFrame"
        df = pd.read_csv(fname, sep='\t', header=None, names=colnames)
        df['truth'] = df['truth'].str.replace('__label__', '')
        # Categorical data type for truth labels
        df['truth'] = df['truth'].astype(int).astype('category')
        # Optional lowercase for test data (if model was trained on lowercased text)
        if lower_case:
            df['text'] = df['text'].str.lower()
        return df

    def accuracy(self, df: pd.DataFrame) -> None:
        "Prediction accuracy (percentage) and F1 score"
        acc = accuracy_score(df['truth'], df['pred'])*100
        f1 = f1_score(df['truth'], df['pred'], average='macro')
        print("Accuracy: {}\nMacro F1-score: {}".format(acc, f1))
	import pandas as pd
	from sklearn.metrics import f1_score, accuracy_score


	class Base:
	"""Base class that houses common utilities for reading in test data
	and calculating model accuracy and F1 scores.
	"""
	def __init__(self) -> None:
	pass

	def read_data(self, fname: str, lower_case: bool=False,
	colnames=['truth', 'text']) -> pd.DataFrame:
	"Read in test data into a Pandas DataFrame"
	df = pd.read_csv(fname, sep='\t', header=None, names=colnames)
	df['truth'] = df['truth'].str.replace('__label__', '')
	# Categorical data type for truth labels
	df['truth'] = df['truth'].astype(int).astype('category')
	# Optional lowercase for test data (if model was trained on lowercased text)
	if lower_case:
	df['text'] = df['text'].str.lower()
	return df

	def accuracy(self, df: pd.DataFrame) -> None:
	"Prediction accuracy (percentage) and F1 score"
	acc = accuracy_score(df['truth'], df['pred'])*100
	f1 = f1_score(df['truth'], df['pred'], average='macro')
	print("Accuracy: {}\nMacro F1-score: {}".format(acc, f1))