Skip to content

Instantly share code, notes, and snippets.

@thesamovar
Created August 17, 2022 16:11
Show Gist options
  • Save thesamovar/0c26baa9ca6c8ccfd2df46e8e32c60e9 to your computer and use it in GitHub Desktop.
Save thesamovar/0c26baa9ca6c8ccfd2df46e8e32c60e9 to your computer and use it in GitHub Desktop.
Publishing decisions model
import numpy as np
from scipy.stats import norm
# Assumptions:
# True score of a paper is x normally distributed with mean 0, variance 1
# Want to publish papers where x>z some threshold (e.g. choose top 5% of papers)
# Each reviewer gives a score y=x+eps where eps is normally distributed with mean 0, variance sigma^2
# n reviewers, take the mean score yn
# Publish the paper if yn>z
# Compute each of the following:
# - True positive rate is chance the paper is published (yn>z) when it "should" have been (x>z)
# - False negative rate is chance the paper is rejected (yn<z) when it should have been published
# - Fraction of published papers which should have been published
# defining parameters
repeats = 10000
n = 10 # number of reviewers
sigma = 1.3 # reviewer noise
threshold_fraction = 0.05 # fraction of papers to accept
# computed parameters
threshold_score = norm.isf(threshold_fraction)
x = np.random.randn(repeats)
e = np.random.randn(repeats, n)*sigma**2
y = x[:, None]+e
yn = np.mean(y, axis=1)
print(f"Pairwise reviewer score correlation: {100*np.corrcoef(y, rowvar=False)[0,1]:.1f}%")
tp = sum(yn[x>threshold_score]>threshold_score)/sum(x>threshold_score)
fn = sum(yn[x>threshold_score]<threshold_score)/sum(x>threshold_score)
fi = sum(x[yn>threshold_score]>threshold_score)/sum(yn>threshold_score)
print(f"True positive rate (published correctly): {100*tp:.1f}%")
print(f"False negative rate (unpublished incorrectly): {100*fn:.1f}%")
print(f"Fraction of published papers that are correctly published: {100*fi:.1f}%")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment