Nick Sorros nsorros

## load_data.py
import os

import argilla as rg


rg.init(
    api_url="https://pro.argilla.io",
    api_key=os.environ.get("ARGILLA_API_KEY"),
    workspace="mantisnlp",
    #extra_headers={"X-Argilla-Workspace": "my_connection_headers"}

## convert_to_csc.py
Y_pred_proba = load_npz(y_pred_path).tocsc()
Y_test = load_npz(y_test_path).tocsc()

## speed_mlcm_calculation.py
if fp(optimal_thresholds_star) > fp(optimal_thresholds):
    optimal_thresholds = optimal_thresholds_star
    y_pred = y_pred_proba > optimal_thresholds[k]
    cmk = confusion_matrix(y_test, y_pred)
    mlcm[k,:,:] = cmk
    updated = True

## select_columns_outside_argmaxf1.py
updated = False
for k in range(N):
    start = time.time()

    y_pred_proba = np.array(Y_pred_proba[:,k].todense()).ravel()
    y_test = np.array(Y_test[:,k].todense()).ravel()
    fp = partial(f, y_pred_proba, y_test, mlcm, k)

    optimal_thresholds_star = argmaxf1(y_pred_proba, y_test, optimal_thresholds, mlcm, k, nb_thresholds)

## custom_confusion_matrix.py
def confusion_matrix(y_test, y_pred):
    tp = y_test.dot(y_pred)
    fp = y_pred.sum() - tp
    fn = y_test.sum() - tp
    tn = y_test.shape[0] - tp - fp - fn
    return np.array([[tn, fp], [fn, tp]])

def f(Y_pred_proba, Y_test, mlcm, k, thresholds):
    y_pred_proba = np.array(Y_pred_proba[:,k].todense()).ravel()
    y_test = np.array(Y_test[:,k].todense()).ravel()

## custom_f1_score.py
def f(Y_pred_proba, Y_test, thresholds):
    Y_pred = Y_pred_proba > thresholds
    mlcm = multilabel_confusion_matrix(Y_test, Y_pred)
    cm = mlcm.sum(axis=0)
    tn, fp, fn, tp = cm.ravel()
    f1 = tp / ( tp+ (fp+fn) / 2)
    return f1

## custom_multilabel_confusion_matrix.py
def multilabel_confusion_matrix(Y_test, Y_pred):
    tp = Y_test.multiply(Y_pred).sum(axis=0)
    fp = Y_pred.sum(axis=0) - tp
    fn = Y_test.sum(axis=0) - tp
    tn = Y_test.shape[0] - tp - fp - fn
    return np.array([tn, fp, fn, tp]).T.reshape(-1, 2, 2)

## optimize_threshold_custom_f1.py
from functools import partial
import time

from sklearn.metrics import multilabel_confusion_matrix
from scipy.sparse import load_npz
import numpy as np
import typer

if "line_profiler" not in dir() and "profile" not in dir():
    # no-op profile decorator

## optimise_threshold_naive.py
from functools import partial
import time

from sklearn.metrics import f1_score
from scipy.sparse import load_npz
import numpy as np
import typer


def f(Y_pred_proba, Y_test, thresholds):

## train_config.py
import configparser
import argparse

def train(data_path, model_path, learning_rate, batch_size):
    ...

if __name__ == "__main__":
    argparser = argparse.ArgumentParser()
    argparser.add_argument("--config", type=str, help="path to config file")
    args = argparser.parse_args()
	import os

	import argilla as rg


	rg.init(
	api_url="https://pro.argilla.io",
	api_key=os.environ.get("ARGILLA_API_KEY"),
	workspace="mantisnlp",
	#extra_headers={"X-Argilla-Workspace": "my_connection_headers"}
	Y_pred_proba = load_npz(y_pred_path).tocsc()
	Y_test = load_npz(y_test_path).tocsc()
	if fp(optimal_thresholds_star) > fp(optimal_thresholds):
	optimal_thresholds = optimal_thresholds_star
	y_pred = y_pred_proba > optimal_thresholds[k]
	cmk = confusion_matrix(y_test, y_pred)
	mlcm[k,:,:] = cmk
	updated = True
	updated = False
	for k in range(N):
	start = time.time()

	y_pred_proba = np.array(Y_pred_proba[:,k].todense()).ravel()
	y_test = np.array(Y_test[:,k].todense()).ravel()
	fp = partial(f, y_pred_proba, y_test, mlcm, k)

	optimal_thresholds_star = argmaxf1(y_pred_proba, y_test, optimal_thresholds, mlcm, k, nb_thresholds)
	def confusion_matrix(y_test, y_pred):
	tp = y_test.dot(y_pred)
	fp = y_pred.sum() - tp
	fn = y_test.sum() - tp
	tn = y_test.shape[0] - tp - fp - fn
	return np.array([[tn, fp], [fn, tp]])

	def f(Y_pred_proba, Y_test, mlcm, k, thresholds):
	y_pred_proba = np.array(Y_pred_proba[:,k].todense()).ravel()
	y_test = np.array(Y_test[:,k].todense()).ravel()
	def f(Y_pred_proba, Y_test, thresholds):
	Y_pred = Y_pred_proba > thresholds
	mlcm = multilabel_confusion_matrix(Y_test, Y_pred)
	cm = mlcm.sum(axis=0)
	tn, fp, fn, tp = cm.ravel()
	f1 = tp / ( tp+ (fp+fn) / 2)
	return f1
	def multilabel_confusion_matrix(Y_test, Y_pred):
	tp = Y_test.multiply(Y_pred).sum(axis=0)
	fp = Y_pred.sum(axis=0) - tp
	fn = Y_test.sum(axis=0) - tp
	tn = Y_test.shape[0] - tp - fp - fn
	return np.array([tn, fp, fn, tp]).T.reshape(-1, 2, 2)
	from functools import partial
	import time

	from sklearn.metrics import multilabel_confusion_matrix
	from scipy.sparse import load_npz
	import numpy as np
	import typer

	if "line_profiler" not in dir() and "profile" not in dir():
	# no-op profile decorator
	from functools import partial
	import time

	from sklearn.metrics import f1_score
	from scipy.sparse import load_npz
	import numpy as np
	import typer


	def f(Y_pred_proba, Y_test, thresholds):
	import configparser
	import argparse

	def train(data_path, model_path, learning_rate, batch_size):
	...

	if __name__ == "__main__":
	argparser = argparse.ArgumentParser()
	argparser.add_argument("--config", type=str, help="path to config file")
	args = argparser.parse_args()