Rocketknight1/bias_initialization.py

## bias_initialization.py
# Assume labels is a possibly multidimensional array of categories / token indices

_, label_counts = np.unique(labels, axis=None, return_counts=True)  # Will flatten multidimensional arrays

# For multi-label classification you should normalize by the number of samples instead
label_frequencies = label_counts.astype(np.float) / np.sum(label_counts)

label_logprobs = np.log(label_frequencies)

# Now you just need to assign the values in label_logprobs to your bias vector!

# In TensorFlow, this will look something like:

model.classifier.bias.assign(label_logprobs)

# In PyTorch:

with torch.no_grad():
  model.classifier.bias.data[:] = label_logprobs

# The exact name of the weight to assign to will depend on the specific model head you're using
	# Assume labels is a possibly multidimensional array of categories / token indices

	_, label_counts = np.unique(labels, axis=None, return_counts=True) # Will flatten multidimensional arrays

	# For multi-label classification you should normalize by the number of samples instead
	label_frequencies = label_counts.astype(np.float) / np.sum(label_counts)

	label_logprobs = np.log(label_frequencies)

	# Now you just need to assign the values in label_logprobs to your bias vector!

	# In TensorFlow, this will look something like:

	model.classifier.bias.assign(label_logprobs)

	# In PyTorch:

	with torch.no_grad():
	model.classifier.bias.data[:] = label_logprobs

	# The exact name of the weight to assign to will depend on the specific model head you're using