An implementation of the sigf toolkit for randomization tests in Python 3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
__author__ = 'Dmitry Ustalov' | |
__credits__ = 'Sebastian Padó' | |
__license__ = 'MIT' | |
# This is an MIT-licensed implementation of the sigf toolkit for randomization tests: | |
# https://nlpado.de/~sebastian/software/sigf.shtml | |
import random | |
import sys | |
from statistics import mean | |
def input_counts(f): | |
return [int(line.strip()) for line in f] | |
def input_tp_fp_fn(f): | |
result = [] | |
for line in f: | |
line = line.strip() | |
if line: result.append(tuple(int(count) for count in line.split(' ', 2))) | |
return result | |
def f1_score(model): | |
tp = sum(obs[0] for obs in model) | |
tp_fp = sum(obs[1] for obs in model) | |
tp_fn = sum(obs[2] for obs in model) | |
if tp == 0 or tp_fp == 0 or tp_fn == 0: return 0. | |
precision, recall = tp / float(tp_fp), tp / float(tp_fn) | |
return 2 * precision * recall / (precision + recall) | |
def randomized_test(model1, model2, score, trials, getrandbits_func): | |
print('# score(model1) = %f' % score(model1), file=sys.stderr) | |
print('# score(model2) = %f' % score(model2), file=sys.stderr) | |
diff = abs(score(model1) - score(model2)) | |
print('# abs(diff) = %f' % diff, file=sys.stderr) | |
uncommon = [i for i in range(len(model1)) if model1[i] != model2[i]] | |
better = 0 | |
for _ in range(trials): | |
model1_local, model2_local = list(model1), list(model2) | |
for i in uncommon: | |
if getrandbits_func(1) == 1: | |
model1_local[i], model2_local[i] = model2[i], model1[i] | |
assert len(model1_local) == len(model2_local) == len(model1) == len(model2) | |
diff_local = abs(score(model1_local) - score(model2_local)) | |
if diff_local >= diff: | |
better += 1 | |
p = (better + 1) / (trials + 1) | |
return p | |
# Every element of SCORES is a pair of input-reading function and scoring function. | |
SCORES = { | |
'mean': (input_counts, mean), | |
'f1': (input_tp_fp_fn, f1_score) | |
} | |
def main(): | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--seed', '-s', type=int) | |
parser.add_argument('--score', choices=SCORES.keys(), default='mean') | |
parser.add_argument('--trials', '-n', type=int, default=10 ** 5) | |
parser.add_argument('model1', type=argparse.FileType('r')) | |
parser.add_argument('model2', type=argparse.FileType('r')) | |
args = parser.parse_args() | |
if args.seed is None: | |
getrandbits_func = random.getrandbits | |
else: | |
rng = random.Random(args.seed) | |
getrandbits_func = rng.getrandbits | |
reader, score = SCORES[args.score] | |
model1, model2 = reader(args.model1), reader(args.model2) | |
assert len(model1) == len(model2) | |
p = randomized_test(model1, model2, score, args.trials, getrandbits_func) | |
print('p-value = %f' % p) | |
if '__main__' == __name__: | |
main() |
Author
dustalov
commented
Oct 23, 2018
Input files have the following format: tp tp+fp tp+fn
.
$ cat exampleFScore/model1
1 2 1
2 2 2
1 2 2
$ cat exampleFScore/model2
1 2 1
0 2 2
0 2 2
$ ./sigf.py --score=f1 -n 100000 exampleFScore/model{1,2}
# score(model1) = 0.727273
# score(model2) = 0.181818
# abs(diff) = 0.545455
p-value = 0.498935
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment