def f1_loss(y_true:torch.Tensor, y_pred:torch.Tensor, is_training=False) -> torch.Tensor:
'''Calculate F1 score. Can work with gpu tensors
The original implmentation is written by Michal Haltuf on Kaggle.
Returns
-------
torch.Tensor
`ndim` == 1. 0 <= val <= 1
Reference
---------
- https://www.kaggle.com/rejpalcz/best-loss-function-for-f1-score-metric
- https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn.metrics.f1_score
- https://discuss.pytorch.org/t/calculating-precision-recall-and-f1-score-in-case-of-multi-label-classification/28265/6
'''
assert y_true.ndim == 1
assert y_pred.ndim == 1 or y_pred.ndim == 2
if y_pred.ndim == 2:
y_pred = y_pred.argmax(dim=1)
tp = (y_true * y_pred).sum().to(torch.float32)
tn = ((1 - y_true) * (1 - y_pred)).sum().to(torch.float32)
fp = ((1 - y_true) * y_pred).sum().to(torch.float32)
fn = (y_true * (1 - y_pred)).sum().to(torch.float32)
epsilon = 1e-7
precision = tp / (tp + fp + epsilon)
recall = tp / (tp + fn + epsilon)
f1 = 2* (precision*recall) / (precision + recall + epsilon)
f1.requires_grad = is_training
return f1
class F1_Loss(nn.Module):
'''Calculate F1 score. Can work with gpu tensors
The original implmentation is written by Michal Haltuf on Kaggle.
Returns
-------
torch.Tensor
`ndim` == 1. epsilon <= val <= 1
Reference
---------
- https://www.kaggle.com/rejpalcz/best-loss-function-for-f1-score-metric
- https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn.metrics.f1_score
- https://discuss.pytorch.org/t/calculating-precision-recall-and-f1-score-in-case-of-multi-label-classification/28265/6
- http://www.ryanzhang.info/python/writing-your-own-loss-function-module-for-pytorch/
'''
def __init__(self, epsilon=1e-7):
super().__init__()
self.epsilon = epsilon
def forward(self, y_pred, y_true,):
assert y_pred.ndim == 2
assert y_true.ndim == 1
y_true = F.one_hot(y_true, 2).to(torch.float32)
y_pred = F.softmax(y_pred, dim=1)
tp = (y_true * y_pred).sum(dim=0).to(torch.float32)
tn = ((1 - y_true) * (1 - y_pred)).sum(dim=0).to(torch.float32)
fp = ((1 - y_true) * y_pred).sum(dim=0).to(torch.float32)
fn = (y_true * (1 - y_pred)).sum(dim=0).to(torch.float32)
precision = tp / (tp + fp + self.epsilon)
recall = tp / (tp + fn + self.epsilon)
f1 = 2* (precision*recall) / (precision + recall + self.epsilon)
f1 = f1.clamp(min=self.epsilon, max=1-self.epsilon)
return 1 - f1.mean()
f1_loss = F1_Loss().cuda()
Last active
August 26, 2020 08:28
-
-
Save nlpjoe/50c16a210a5898d2589ac341274db4bb to your computer and use it in GitHub Desktop.
[AUC、cos评价指标、f1 score] #pytorch #ml
Macro F1: 将n分类的评价拆成n个二分类的评价,计算每个二分类的F1 score,n个F1 score的平均值即为Macro F1。
Micro F1: 将n分类的评价拆成n个二分类的评价,将n个二分类评价的TP、FP、RN对应相加,计算评价准确率和召回率,由这2个准确率和召回率计算的F1 score即为Micro F1。
一般来讲,Macro F1、Micro F1 高的分类效果好。Macro F1受样本数量少的类别影响大。
基本元素:
(1)若一个实例是正类,并且被预测为正类,即为真正类TP(True Positive ) (2)若一个实例是正类,但是被预测为负类,即为假负类FN(False Negative ) (3)若一个实例是负类,但是被预测为正类,即为假正类FP(False Positive ) (4)若一个实例是负类,并且被预测为负类,即为真负类TN(True Negative )
真正率:TP/TP+FN(TPRate:所有label=1的数据,预测为1的比例) 假正率:FN/FP+TN(FPRate:所有label=0的数据,预测为1的比例)
Roc曲线,横轴是FPRate,纵轴是TPRate。
我们希望分类器达到的效果是:对于真实类别为1的样本,分类器预测为1的概率(即TPRate),要大于真实类别为0而预测类别为1的概率(即FPRate),即y>x。 最理想的情况下,既没有真实类别为1而错分为0的样本——TPRate一直为1,也没有真实类别为0而错分为1的样本——FP rate一直为0,AUC为1,这便是AUC的极大值。
class RocAucMeter(object):
def __init__(self):
self.reset()
def reset(self):
self.y_true = np.array([0,1])
self.y_pred = np.array([0.5,0.5])
self.score = 0
def update(self, y_true, y_pred):
y_true = y_true.cpu().numpy().argmax(axis=1)
y_pred = nn.functional.softmax(y_pred, dim=1).data.cpu().numpy()[:,1]
self.y_true = np.hstack((self.y_true, y_true))
self.y_pred = np.hstack((self.y_pred, y_pred))
self.score = sklearn.metrics.roc_auc_score(self.y_true, self.y_pred, labels=np.array([0, 1]))
@property
def avg(self):
return self.score
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
from torch import Tensor
def pytorch_cos_sim(a: Tensor, b: Tensor):
"""
Computes the cosine similarity cos_sim(a[i], b[j]) for all i and j.
This function can be used as a faster replacement for 1-scipy.spatial.distance.cdist(a,b)
:return: Matrix with res[i][j] = cos_sim(a[i], b[j])
"""
if len(a.shape) == 1:
a = a.unsqueeze(0)
if len(b.shape) == 1:
b = b.unsqueeze(0)
a_norm = a / a.norm(dim=1)[:, None]
b_norm = b / b.norm(dim=1)[:, None]
return torch.mm(a_norm, b_norm.transpose(0, 1))
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment