Skip to content

Instantly share code, notes, and snippets.

@dustalov
Last active February 22, 2023 16:50
Show Gist options
  • Save dustalov/e6c3b9d3b5b83c81ecd92976e0281d6c to your computer and use it in GitHub Desktop.
Save dustalov/e6c3b9d3b5b83c81ecd92976e0281d6c to your computer and use it in GitHub Desktop.
An implementation of the sigf toolkit for randomization tests in Python 3
#!/usr/bin/env python3
__author__ = 'Dmitry Ustalov'
__credits__ = 'Sebastian Padó'
__license__ = 'MIT'
# This is an MIT-licensed implementation of the sigf toolkit for randomization tests:
# https://nlpado.de/~sebastian/software/sigf.shtml
import random
import sys
from statistics import mean
def input_counts(f):
return [int(line.strip()) for line in f]
def input_tp_fp_fn(f):
result = []
for line in f:
line = line.strip()
if line: result.append(tuple(int(count) for count in line.split(' ', 2)))
return result
def f1_score(model):
tp = sum(obs[0] for obs in model)
tp_fp = sum(obs[1] for obs in model)
tp_fn = sum(obs[2] for obs in model)
if tp == 0 or tp_fp == 0 or tp_fn == 0: return 0.
precision, recall = tp / float(tp_fp), tp / float(tp_fn)
return 2 * precision * recall / (precision + recall)
def randomized_test(model1, model2, score, trials, getrandbits_func):
print('# score(model1) = %f' % score(model1), file=sys.stderr)
print('# score(model2) = %f' % score(model2), file=sys.stderr)
diff = abs(score(model1) - score(model2))
print('# abs(diff) = %f' % diff, file=sys.stderr)
uncommon = [i for i in range(len(model1)) if model1[i] != model2[i]]
better = 0
for _ in range(trials):
model1_local, model2_local = list(model1), list(model2)
for i in uncommon:
if getrandbits_func(1) == 1:
model1_local[i], model2_local[i] = model2[i], model1[i]
assert len(model1_local) == len(model2_local) == len(model1) == len(model2)
diff_local = abs(score(model1_local) - score(model2_local))
if diff_local >= diff:
better += 1
p = (better + 1) / (trials + 1)
return p
# Every element of SCORES is a pair of input-reading function and scoring function.
SCORES = {
'mean': (input_counts, mean),
'f1': (input_tp_fp_fn, f1_score)
}
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--seed', '-s', type=int)
parser.add_argument('--score', choices=SCORES.keys(), default='mean')
parser.add_argument('--trials', '-n', type=int, default=10 ** 5)
parser.add_argument('model1', type=argparse.FileType('r'))
parser.add_argument('model2', type=argparse.FileType('r'))
args = parser.parse_args()
if args.seed is None:
getrandbits_func = random.getrandbits
else:
rng = random.Random(args.seed)
getrandbits_func = rng.getrandbits
reader, score = SCORES[args.score]
model1, model2 = reader(args.model1), reader(args.model2)
assert len(model1) == len(model2)
p = randomized_test(model1, model2, score, args.trials, getrandbits_func)
print('p-value = %f' % p)
if '__main__' == __name__:
main()
@dustalov
Copy link
Author

dustalov commented Oct 23, 2018

Input files have the following format: tp tp+fp tp+fn.

$ cat exampleFScore/model1
1 2 1
2 2 2
1 2 2
$ cat exampleFScore/model2
1 2 1
0 2 2
0 2 2
$ ./sigf.py --score=f1 -n 100000 exampleFScore/model{1,2}
# score(model1) = 0.727273
# score(model2) = 0.181818
# abs(diff) = 0.545455
p-value = 0.498935

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment