Skip to content

Instantly share code, notes, and snippets.

@vict0rsch
Last active March 16, 2021 03:17
Show Gist options
  • Star 9 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save vict0rsch/ce34af00a425fb322a2622de483ade9e to your computer and use it in GitHub Desktop.
Save vict0rsch/ce34af00a425fb322a2622de483ade9e to your computer and use it in GitHub Desktop.
Streaming and Multilabel F1 score in Tensorflow
# From my blog post: http://vict0rsch.github.io/2018/06/06/tensorflow-streaming-multilabel-f1/
import tensorflow as tf
import numpy as np
from tensorflow.python.ops import variable_scope
from tensorflow.python.ops import array_ops
from tensorflow.python.framework import ops
from sklearn.metrics import f1_score
def metric_variable(shape, dtype, validate_shape=True, name=None):
"""Create variable in `GraphKeys.(LOCAL|METRIC_VARIABLES`) collections.
from https://github.com/tensorflow/tensorflow/blob/r1.8/tensorflow/python/ops/metrics_impl.py
"""
return variable_scope.variable(
lambda: array_ops.zeros(shape, dtype),
trainable=False,
collections=[ops.GraphKeys.LOCAL_VARIABLES, ops.GraphKeys.METRIC_VARIABLES],
validate_shape=validate_shape,
name=name,
)
def streaming_counts(y_true, y_pred, num_classes):
"""Computes the TP, FP and FN counts for the micro and macro f1 scores.
The weighted f1 score can be inferred from the macro f1 score provided
we compute the weights also.
This function also defines the update ops to these counts
Args:
y_true (Tensor): 2D Tensor representing the target labels
y_pred (Tensor): 2D Tensor representing the predicted labels
num_classes (int): number of possible classes
Returns:
tuple: the first element in the tuple is itself a tuple grouping the counts,
the second element is the grouped update op.
"""
# Weights for the weighted f1 score
weights = metric_variable(
shape=[num_classes], dtype=tf.int64, validate_shape=False, name="weights"
)
# Counts for the macro f1 score
tp_mac = metric_variable(
shape=[num_classes], dtype=tf.int64, validate_shape=False, name="tp_mac"
)
fp_mac = metric_variable(
shape=[num_classes], dtype=tf.int64, validate_shape=False, name="fp_mac"
)
fn_mac = metric_variable(
shape=[num_classes], dtype=tf.int64, validate_shape=False, name="fn_mac"
)
# Counts for the micro f1 score
tp_mic = metric_variable(
shape=[], dtype=tf.int64, validate_shape=False, name="tp_mic"
)
fp_mic = metric_variable(
shape=[], dtype=tf.int64, validate_shape=False, name="fp_mic"
)
fn_mic = metric_variable(
shape=[], dtype=tf.int64, validate_shape=False, name="fn_mic"
)
# Update ops, as in the previous section:
# - Update ops for the macro f1 score
up_tp_mac = tf.assign_add(tp_mac, tf.count_nonzero(y_pred * y_true, axis=0))
up_fp_mac = tf.assign_add(fp_mac, tf.count_nonzero(y_pred * (y_true - 1), axis=0))
up_fn_mac = tf.assign_add(fn_mac, tf.count_nonzero((y_pred - 1) * y_true, axis=0))
# - Update ops for the micro f1 score
up_tp_mic = tf.assign_add(tp_mic, tf.count_nonzero(y_pred * y_true, axis=None))
up_fp_mic = tf.assign_add(
fp_mic, tf.count_nonzero(y_pred * (y_true - 1), axis=None)
)
up_fn_mic = tf.assign_add(
fn_mic, tf.count_nonzero((y_pred - 1) * y_true, axis=None)
)
# Update op for the weights, just summing
up_weights = tf.assign_add(weights, tf.reduce_sum(y_true, axis=0))
# Grouping values
counts = (tp_mac, fp_mac, fn_mac, tp_mic, fp_mic, fn_mic, weights)
updates = tf.group(
up_tp_mic, up_fp_mic, up_fn_mic, up_tp_mac, up_fp_mac, up_fn_mac, up_weights
)
return counts, updates
def streaming_f1(counts):
"""Computes the f1 scores from the TP, FP and FN counts
Args:
counts (tuple): macro and micro counts, and weights in the end
Returns:
tuple(Tensor): The 3 tensors representing the micro, macro and weighted
f1 score
"""
# unpacking values
tp_mac, fp_mac, fn_mac, tp_mic, fp_mic, fn_mic, weights = counts
# normalize weights
weights /= tf.reduce_sum(weights)
# computing the micro f1 score
prec_mic = tp_mic / (tp_mic + fp_mic)
rec_mic = tp_mic / (tp_mic + fn_mic)
f1_mic = 2 * prec_mic * rec_mic / (prec_mic + rec_mic)
f1_mic = tf.reduce_mean(f1_mic)
# computing the macro and weighted f1 score
prec_mac = tp_mac / (tp_mac + fp_mac)
rec_mac = tp_mac / (tp_mac + fn_mac)
f1_mac = 2 * prec_mac * rec_mac / (prec_mac + rec_mac)
f1_wei = tf.reduce_sum(f1_mac * weights)
f1_mac = tf.reduce_mean(f1_mac)
return f1_mic, f1_mac, f1_wei
def tf_f1_score(y_true, y_pred):
"""Computes 3 different f1 scores, micro macro
weighted.
micro: f1 score accross the classes, as 1
macro: mean of f1 scores per class
weighted: weighted average of f1 scores per class,
weighted from the support of each class
Args:
y_true (Tensor): labels, with shape (batch, num_classes)
y_pred (Tensor): model's predictions, same shape as y_true
Returns:
tupe(Tensor): (micro, macro, weighted)
tuple of the computed f1 scores
"""
f1s = [0, 0, 0]
y_true = tf.cast(y_true, tf.float64)
y_pred = tf.cast(y_pred, tf.float64)
for i, axis in enumerate([None, 0]):
TP = tf.count_nonzero(y_pred * y_true, axis=axis)
FP = tf.count_nonzero(y_pred * (y_true - 1), axis=axis)
FN = tf.count_nonzero((y_pred - 1) * y_true, axis=axis)
precision = TP / (TP + FP)
recall = TP / (TP + FN)
f1 = 2 * precision * recall / (precision + recall)
f1s[i] = tf.reduce_mean(f1)
weights = tf.reduce_sum(y_true, axis=0)
weights /= tf.reduce_sum(weights)
f1s[2] = tf.reduce_sum(f1 * weights)
micro, macro, weighted = f1s
return micro, macro, weighted
def alter_data(_data):
"""Adds noise to the data to simulate predictions.
Each label for each sample has 20% chance of being flipped
Args:
_data (np.array): true values to perturb
Returns:
np.array: predictions
"""
data = _data.copy()
new_data = []
for d in data:
for i, l in enumerate(d):
if np.random.rand() < 0.2:
d[i] = (d[i] + 1) % 2
new_data.append(d)
return np.array(new_data)
def get_data():
"""Generate random multilabel data:
y_true and y_pred are one-hot arrays, but since it's a multi-label setting,
there may be several `1` per line.
Returns:
tuple: y_true, y_pred
"""
# Number of different classes
num_classes = 10
classes = list(range(num_classes))
# Numberof samples in synthetic dataset
examples = 10000
# Max number of labels per sample. Minimum is 1
max_labels = 5
class_probabilities = np.array(
list(6 * np.exp(-i * 5 / num_classes) + 1 for i in range(num_classes))
)
class_probabilities /= class_probabilities.sum()
labels = [
np.random.choice(
classes,
# number of labels for this sample
size=np.random.randint(1, max_labels),
p=class_probabilities, # Probability of drawing each class
replace=False, # A class can only occure once
)
for _ in range(examples) # Do it `examples` times
]
y_true = np.zeros((examples, num_classes)).astype(np.int64)
for i, l in enumerate(labels):
y_true[i][l] = 1
y_pred = alter_data(y_true)
return y_true, y_pred
if __name__ == "__main__":
np.random.seed(0)
y_true, y_pred = get_data()
num_classes = y_true.shape[-1]
bs = 100
t = tf.placeholder(tf.int64, [None, None], "y_true")
p = tf.placeholder(tf.int64, [None, None], "y_pred")
tf_f1 = tf_f1_score(t, p)
counts, update = streaming_counts(t, p, num_classes)
streamed_f1 = streaming_f1(counts)
with tf.Session() as sess:
tf.local_variables_initializer().run()
mic, mac, wei = sess.run(tf_f1, feed_dict={t: y_true, p: y_pred})
print("{:40}".format("\nTotal, overall f1 scores: "), mic, mac, wei)
for i in range(len(y_true) // bs):
y_t = y_true[i * bs : (i + 1) * bs].astype(np.int64)
y_p = y_pred[i * bs : (i + 1) * bs].astype(np.int64)
_ = sess.run(update, feed_dict={t: y_t, p: y_p})
mic, mac, wei = [f.eval() for f in streamed_f1]
print("{:40}".format("\nStreamed, batch-wise f1 scores:"), mic, mac, wei)
mic = f1_score(y_true, y_pred, average="micro")
mac = f1_score(y_true, y_pred, average="macro")
wei = f1_score(y_true, y_pred, average="weighted")
print("{:40}".format("\nFor reference, scikit f1 scores:"), mic, mac, wei)
@vict0rsch
Copy link
Author

Output:
(micro, macro, weighted)

Total, overall f1 scores:               0.665699032365699 0.6241802918567532 0.686824189759798

Streamed, batch-wise f1 scores:         0.665699032365699 0.6241802918567532 0.686824189759798

For reference, scikit f1 scores:        0.665699032365699 0.6241802918567531 0.6868241897597981

@dipanjan
Copy link

dipanjan commented Jun 2, 2019

There's type incompatibility in this code: TypeError: Input 'y' of 'Mul' Op has type float64 that does not match type int64 of argument 'x'.

@vict0rsch
Copy link
Author

vict0rsch commented Jun 3, 2019

What version of tf do you have? it used to work with earlier versions but maybe they had some automating casting which they don't do anymore

Anyway looks like you're just a tf.cast(..., tf.float/int 64) away from smooth running :)

https://www.tensorflow.org/api_docs/python/tf/dtypes/cast

Works fine on 1.13.1

@dipanjan
Copy link

dipanjan commented Jun 3, 2019

Interesting. I am also on 1.13.1

@vict0rsch
Copy link
Author

vict0rsch commented Jun 3, 2019

Let's move to stackoverflow, this is not the place for debugging IMHO, open a question there if you want to

solved: https://stackoverflow.com/a/56425424/3867406

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment