thesamovar/publishing_decisions_model.py

## publishing_decisions_model.py
import numpy as np
from scipy.stats import norm

# Assumptions:
# True score of a paper is x normally distributed with mean 0, variance 1
# Want to publish papers where x>z some threshold (e.g. choose top 5% of papers)
# Each reviewer gives a score y=x+eps where eps is normally distributed with mean 0, variance sigma^2
# n reviewers, take the mean score yn
# Publish the paper if yn>z
# Compute each of the following:
# - True positive rate is chance the paper is published (yn>z) when it "should" have been (x>z)
# - False negative rate is chance the paper is rejected (yn<z) when it should have been published
# - Fraction of published papers which should have been published

# defining parameters
repeats = 10000
n = 10 # number of reviewers
sigma = 1.3 # reviewer noise
threshold_fraction = 0.05 # fraction of papers to accept

# computed parameters
threshold_score = norm.isf(threshold_fraction)
x = np.random.randn(repeats)
e = np.random.randn(repeats, n)*sigma**2
y = x[:, None]+e
yn = np.mean(y, axis=1)
print(f"Pairwise reviewer score correlation: {100*np.corrcoef(y, rowvar=False)[0,1]:.1f}%")
tp = sum(yn[x>threshold_score]>threshold_score)/sum(x>threshold_score)
fn = sum(yn[x>threshold_score]<threshold_score)/sum(x>threshold_score)
fi = sum(x[yn>threshold_score]>threshold_score)/sum(yn>threshold_score)
print(f"True positive rate (published correctly): {100*tp:.1f}%")
print(f"False negative rate (unpublished incorrectly): {100*fn:.1f}%")
print(f"Fraction of published papers that are correctly published: {100*fi:.1f}%")
	import numpy as np
	from scipy.stats import norm

	# Assumptions:
	# True score of a paper is x normally distributed with mean 0, variance 1
	# Want to publish papers where x>z some threshold (e.g. choose top 5% of papers)
	# Each reviewer gives a score y=x+eps where eps is normally distributed with mean 0, variance sigma^2
	# n reviewers, take the mean score yn
	# Publish the paper if yn>z
	# Compute each of the following:
	# - True positive rate is chance the paper is published (yn>z) when it "should" have been (x>z)
	# - False negative rate is chance the paper is rejected (yn<z) when it should have been published
	# - Fraction of published papers which should have been published

	# defining parameters
	repeats = 10000
	n = 10 # number of reviewers
	sigma = 1.3 # reviewer noise
	threshold_fraction = 0.05 # fraction of papers to accept

	# computed parameters
	threshold_score = norm.isf(threshold_fraction)
	x = np.random.randn(repeats)
	e = np.random.randn(repeats, n)sigma*2
	y = x[:, None]+e
	yn = np.mean(y, axis=1)
	print(f"Pairwise reviewer score correlation: {100*np.corrcoef(y, rowvar=False)[0,1]:.1f}%")
	tp = sum(yn[x>threshold_score]>threshold_score)/sum(x>threshold_score)
	fn = sum(yn[x>threshold_score]<threshold_score)/sum(x>threshold_score)
	fi = sum(x[yn>threshold_score]>threshold_score)/sum(yn>threshold_score)
	print(f"True positive rate (published correctly): {100*tp:.1f}%")
	print(f"False negative rate (unpublished incorrectly): {100*fn:.1f}%")
	print(f"Fraction of published papers that are correctly published: {100*fi:.1f}%")