philerooski/score_by_task.py

## score_by_task.py
from __future__ import division, print_function
import synapseclient as sc
import pandas as pd
import numpy as np
import argparse
import LDopaScorer


TRAINING_TABLE = 'syn10495809'
TESTING_TABLE = 'syn10701954'
TESTING_WITH_SCORES_TABLE = 'syn10518012'
SUBMISSION_TABLES = {
        'tremor': 'syn11559815',
        'dyskinesia': 'syn11559820',
        'bradykinesia': 'syn11559823'
        }
TASK_GROUPINGS = {
        'drnkg': ('drnkg',),
        'fldng': ('fldng',),
        'ftn': ('ftnr1', 'ftnr2', 'ftnl1', 'ftnl2'),
        'ntblt': ('ntblt',),
        'orgpa': ('orgpa',),
        'ram': ('ramr1', 'ramr2', 'raml1', 'raml2')}
CATEGORY_WEIGHTS = {
        'tremor': [896, 381, 214, 9, 0],
        'dyskinesia': [531, 129],
        'bradykinesia': [990, 419]}


def read_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--num-bootstraps", type=int)
    args = parser.parse_args()
    return args

def fetch_task_dfhis(syn):
    task_dfhis = {}
    for group in TASK_GROUPINGS:
        train_dfhi, test_dfhi = fetch_group_dfhis(syn, TASK_GROUPINGS[group])
        dfhis = np.append(train_dfhi, test_dfhi)
        task_dfhis[group] = dfhis
    return task_dfhis


def get_table(syn, synId):
    q = syn.tableQuery("select * from {}".format(synId))
    df = q.asDataFrame()
    df = df.set_index("dataFileHandleId", drop=True)
    return df


def count_cases(syn, df, phenotype, group, isTestDataset):
    if isTestDataset:
        test = get_table(syn, TESTING_TABLE)[
                ["tremorScore", "dyskinesiaScore", "bradykinesiaScore"]]
        df = df.merge(test, left_index=True, right_index=True)
        df = df[df['{}Score_y'.format(phenotype)] == "Score"]
        scores_for_task = df.query("task in {}".format(TASK_GROUPINGS[group]))
        scores = scores_for_task['{}Score_x'.format(phenotype)].values
    else:
        scores_for_task = df.query("task in {}".format(TASK_GROUPINGS[group]))
        scores = scores_for_task['{}Score'.format(phenotype)].values
    scores_without_na = scores[pd.notnull(scores)]
    unique, counts = np.unique(scores_without_na, return_counts=True)
    return dict(zip(unique, counts))


def count_test_cases_for_each_task(syn):
    task_counts = pd.DataFrame(columns=['taskGroup', 'phenotype',
                                        'class', 'trainCases', 'testCases'])
    train = get_table(syn, TRAINING_TABLE)
    test = get_table(syn, TESTING_WITH_SCORES_TABLE)
    for group in TASK_GROUPINGS:
        train_dfhi, test_dfhi = fetch_group_dfhis(syn, TASK_GROUPINGS[group])
        train_classes = train.loc[train_dfhi]
        test_classes = test.loc[test_dfhi]
        for phenotype in ['tremor', 'dyskinesia', 'bradykinesia']:
            train_counts = count_cases(syn, train, phenotype, group, False)
            test_counts = count_cases(syn, test, phenotype, group, True)
            classes = [0, 1, 2, 3, 4] if phenotype == "tremor" else [0, 1]
            for c in classes:
                task_counts = task_counts.append({
                    'taskGroup': group,
                    'phenotype': phenotype,
                    'class': c,
                    'testCases': test_counts.setdefault(c, 0),
                    'trainCases': train_counts.setdefault(c, 0)},
                    ignore_index=True)
    return task_counts


def fetch_submissions(syn, phenotype):
    tableId = SUBMISSION_TABLES[phenotype]
    q_submission = syn.tableQuery(
            "select submissionId, dataFileHandleId from {}".format(tableId))
    submission_paths = syn.downloadTableColumns(
            q_submission, "dataFileHandleId")
    return q_submission.asDataFrame(), submission_paths


def fetch_group_dfhis(syn, group):
    query_string = "select dataFileHandleId from {} where task in {}"
    group_string = "('" + "', '".join(group) + "')"
    train_query = query_string.format(TRAINING_TABLE, group_string)
    test_query = query_string.format(TESTING_TABLE, group_string)
    q_train = syn.tableQuery(train_query).asDataFrame()
    q_test = syn.tableQuery(test_query).asDataFrame()
    return q_train.dataFileHandleId.values, q_test.dataFileHandleId.values


def get_weighted_mean(scores, weights):
    numer = 0
    denom = sum(weights)
    for w, s in zip(weights, scores):
        if pd.notnull(s):
            numer += w * s
    return numer / denom


def get_weights(phenotype, taskGroup, task_counts, forColumn="testCases"):
    weights = task_counts.query(
            "taskGroup == '{}' and phenotype == '{}'".format(
                taskGroup, phenotype))
    return weights[forColumn].values


def get_bootstrap_weights(dfhis, test_table_with_scores, phenotype):
    test_table_with_scores_filtered = test_table_with_scores.loc[dfhis]
    scores = test_table_with_scores_filtered["{}Score".format(phenotype)].values
    scores_without_na = scores[pd.notnull(scores)]
    unique, counts = np.unique(scores_without_na, return_counts=True)
    class_counts = dict(zip(unique, counts))
    for i in range(len(CATEGORY_WEIGHTS[phenotype])):
        if i not in class_counts:
            class_counts[i] = 0
    return class_counts


def score_all(phenotype, submissions, task_dfhis, task_counts):
    results = {}
    for dfhi in submissions:
        dfhi, path = str(dfhi), str(submissions[dfhi])
        auprc_by_task_group = score(
            phenotype, path, task_dfhis, task_counts, bootstrap)
        results[dfhi] = auprc_by_task_group
    return results


def score_all_bootstrap(phenotype, submissions, task_dfhis, task_counts,
                        num_bootstraps, test_table_with_scores):
    results = {}
    for dfhi in submissions:
        dfhi, path = str(dfhi), str(submissions[dfhi])
        print(dfhi)
        score_info = score_bootstrap(
            phenotype, path, task_dfhis, task_counts,
            num_bootstraps, test_table_with_scores)
        results[dfhi] = score_info
    return results

def score_bootstrap(phenotype, path, task_dfhis, task_counts,
                    num_bootstraps, test_table_with_scores):
    train_X, test_X, train_y, test_y, index = LDopaScorer.read_data(path, phenotype)
    score_info = {}
    for task in task_dfhis:
        relevant_dfhis = task_dfhis[task]
        train_relevant_dfhis = train_X.index.intersection(relevant_dfhis)
        train_X_task = train_X.loc[train_relevant_dfhis]
        train_y_task = train_y.loc[train_relevant_dfhis]
        weights_train = get_weights(phenotype, task, task_counts, "trainCases")
        classes = [i for i in range(len(weights_train))
                   if weights_train[i] > 0]
        if len(train_X_task):
            ensemble = LDopaScorer.train_ensemble(
                    train_X_task.values, train_y_task.values)
            test_relevant_dfhis = test_X.index.intersection(relevant_dfhis)
            for i in range(num_bootstraps):
                weights_test = {0: None}
                while 1 not in weights_test:
                    test_relevant_dfhis_bootstrap = np.random.choice(
                        np.array([j for j in test_relevant_dfhis]),
                        size=len(test_relevant_dfhis), replace=True)
                    weights_test = get_bootstrap_weights(
                        test_relevant_dfhis_bootstrap, test_table_with_scores, phenotype)
                test_X_task = test_X.loc[test_relevant_dfhis_bootstrap]
                test_y_task = test_y.loc[test_relevant_dfhis_bootstrap]
                results, y_score, y_true = LDopaScorer.getNonLinearInterpAupr(
                        test_X_task, test_y_task, classes, ensemble)
                if phenotype == 'tremor':
                    weighted_aupr = get_weighted_mean(results,
                    [weights_test.setdefault(k, 0) for k in range(5)])
                else:
                    weighted_aupr = results[0]
                score_info[(task, i)] = {
                    'aupr': weighted_aupr, 'weights': weights_test}
    return score_info


def score(phenotype, path, task_dfhis, task_counts):
    train_X, test_X, train_y, test_y, index = LDopaScorer.read_data(path, phenotype)
    task_scores = {}
    for task in task_dfhis:
        relevant_dfhis = task_dfhis[task]
        train_relevant_dfhis = train_X.index.intersection(relevant_dfhis)
        test_relevant_dfhis = test_X.index.intersection(relevant_dfhis)
        train_X_task = train_X.loc[train_relevant_dfhis]
        test_X_task = test_X.loc[test_relevant_dfhis]
        train_y_task = train_y.loc[train_relevant_dfhis]
        test_y_task = test_y.loc[test_relevant_dfhis]
        weights_train = get_weights(phenotype, task, task_counts, "trainCases")
        weights_test = get_weights(phenotype, task, task_counts, "testCases")
        classes = [i for i in range(len(weights_train))
                   if weights_train[i] > 0]
        if len(train_X_task) and len(test_X_task):
            ensemble = LDopaScorer.train_ensemble(
                    train_X_task.values, train_y_task.values)
            results, y_score, y_true = LDopaScorer.getNonLinearInterpAupr(
                    test_X_task, test_y_task, classes, ensemble)
            print("results", results)
            if phenotype == 'tremor':
                weighted_aupr = get_weighted_mean(results, weights_test)
            else:
                weighted_aupr = results[0]
            task_scores[task] = weighted_aupr
    return task_scores


def write_scores_to_file(scores, phenotype, submissions):
    scores_as_list = []
    tasks = None
    for dfhi in scores:
        tasks = scores[dfhi].keys()
        break
    for dfhi in scores:
        score_row = np.append([dfhi], [scores[dfhi][t] for t in tasks])
        scores_as_list.append(score_row)
    scores = pd.DataFrame(
            scores_as_list,
            columns=np.append(['dataFileHandleId'], tasks))
    # two hours later, dtypes must match...
    submissions = submissions.dropna()
    scores.dataFileHandleId = scores.dataFileHandleId.astype(int)
    submissions.dataFileHandleId = submissions.dataFileHandleId.astype(int)
    submissions.submissionId = submissions.submissionId.astype(int)
    submissions = submissions.merge(scores, on="dataFileHandleId")
    submissions = submissions.drop("dataFileHandleId", axis=1)
    submissions.to_csv("{}_scores_by_task.csv".format(phenotype),
                       index=False, header=True)
    return scores

def write_bootstrap_scores_to_file(scores, phenotype, submissions):
    scores_as_list = []
    tasks = None
    for dfhi in scores:
        tasks = list(np.unique([i[0] for i in scores[dfhi].keys()]))
        break
    for dfhi in scores:
        for score_info in scores[dfhi]:
            task, bootstrap_num = score_info
            aupr, weights = scores[dfhi][score_info]['aupr'], \
                            scores[dfhi][score_info]['weights']
            class_weights = [weights.setdefault(i, 0) for i in range(5)]
            score_row = [dfhi, phenotype, task, bootstrap_num, aupr] + class_weights
            scores_as_list.append(score_row)
    scores = pd.DataFrame(
            scores_as_list,
            columns=['dataFileHandleId', 'phenotype', 'task', 'bootstrap', 'AUPR',
                     'testClassCount_0', 'testClassCount_1',
                     'testClassCount_2', 'testClassCount_3',
                     'testClassCount_4'])
    # two hours later, dtypes must match...
    submissions = submissions.dropna()
    scores.dataFileHandleId = scores.dataFileHandleId.astype(int)
    submissions.dataFileHandleId = submissions.dataFileHandleId.astype(int)
    submissions.submissionId = submissions.submissionId.astype(int)
    submissions = submissions.merge(scores, on="dataFileHandleId")
    submissions = submissions.drop("dataFileHandleId", axis=1)
    submissions.to_csv("{}_bootstrap_scores_by_task.csv".format(phenotype),
                       index=False, header=True)
    return scores

def main():
    args = read_args()
    syn = sc.login()
    task_dfhis = fetch_task_dfhis(syn)
    task_counts = count_test_cases_for_each_task(syn)
    test_table_with_scores = get_table(syn, TESTING_WITH_SCORES_TABLE)
    for phenotype in SUBMISSION_TABLES:
        submissions, submission_paths = fetch_submissions(syn, phenotype)
        if args.num_bootstraps:
            scores = score_all_bootstrap(
                phenotype, submission_paths, task_dfhis, task_counts,
                args.num_bootstraps, test_table_with_scores)
            write_bootstrap_scores_to_file(scores, phenotype, submissions)
        else:
            scores = score_all(phenotype, submission_paths, task_dfhis, task_counts)
            write_scores_to_file(scores, phenotype, submissions)


if __name__ == "__main__":
    main()
	from __future__ import division, print_function
	import synapseclient as sc
	import pandas as pd
	import numpy as np
	import argparse
	import LDopaScorer


	TRAINING_TABLE = 'syn10495809'
	TESTING_TABLE = 'syn10701954'
	TESTING_WITH_SCORES_TABLE = 'syn10518012'
	SUBMISSION_TABLES = {
	'tremor': 'syn11559815',
	'dyskinesia': 'syn11559820',
	'bradykinesia': 'syn11559823'
	}
	TASK_GROUPINGS = {
	'drnkg': ('drnkg',),
	'fldng': ('fldng',),
	'ftn': ('ftnr1', 'ftnr2', 'ftnl1', 'ftnl2'),
	'ntblt': ('ntblt',),
	'orgpa': ('orgpa',),
	'ram': ('ramr1', 'ramr2', 'raml1', 'raml2')}
	CATEGORY_WEIGHTS = {
	'tremor': [896, 381, 214, 9, 0],
	'dyskinesia': [531, 129],
	'bradykinesia': [990, 419]}


	def read_args():
	parser = argparse.ArgumentParser()
	parser.add_argument("--num-bootstraps", type=int)
	args = parser.parse_args()
	return args

	def fetch_task_dfhis(syn):
	task_dfhis = {}
	for group in TASK_GROUPINGS:
	train_dfhi, test_dfhi = fetch_group_dfhis(syn, TASK_GROUPINGS[group])
	dfhis = np.append(train_dfhi, test_dfhi)
	task_dfhis[group] = dfhis
	return task_dfhis


	def get_table(syn, synId):
	q = syn.tableQuery("select * from {}".format(synId))
	df = q.asDataFrame()
	df = df.set_index("dataFileHandleId", drop=True)
	return df


	def count_cases(syn, df, phenotype, group, isTestDataset):
	if isTestDataset:
	test = get_table(syn, TESTING_TABLE)[
	["tremorScore", "dyskinesiaScore", "bradykinesiaScore"]]
	df = df.merge(test, left_index=True, right_index=True)
	df = df[df['{}Score_y'.format(phenotype)] == "Score"]
	scores_for_task = df.query("task in {}".format(TASK_GROUPINGS[group]))
	scores = scores_for_task['{}Score_x'.format(phenotype)].values
	else:
	scores_for_task = df.query("task in {}".format(TASK_GROUPINGS[group]))
	scores = scores_for_task['{}Score'.format(phenotype)].values
	scores_without_na = scores[pd.notnull(scores)]
	unique, counts = np.unique(scores_without_na, return_counts=True)
	return dict(zip(unique, counts))


	def count_test_cases_for_each_task(syn):
	task_counts = pd.DataFrame(columns=['taskGroup', 'phenotype',
	'class', 'trainCases', 'testCases'])
	train = get_table(syn, TRAINING_TABLE)
	test = get_table(syn, TESTING_WITH_SCORES_TABLE)
	for group in TASK_GROUPINGS:
	train_dfhi, test_dfhi = fetch_group_dfhis(syn, TASK_GROUPINGS[group])
	train_classes = train.loc[train_dfhi]
	test_classes = test.loc[test_dfhi]
	for phenotype in ['tremor', 'dyskinesia', 'bradykinesia']:
	train_counts = count_cases(syn, train, phenotype, group, False)
	test_counts = count_cases(syn, test, phenotype, group, True)
	classes = [0, 1, 2, 3, 4] if phenotype == "tremor" else [0, 1]
	for c in classes:
	task_counts = task_counts.append({
	'taskGroup': group,
	'phenotype': phenotype,
	'class': c,
	'testCases': test_counts.setdefault(c, 0),
	'trainCases': train_counts.setdefault(c, 0)},
	ignore_index=True)
	return task_counts


	def fetch_submissions(syn, phenotype):
	tableId = SUBMISSION_TABLES[phenotype]
	q_submission = syn.tableQuery(
	"select submissionId, dataFileHandleId from {}".format(tableId))
	submission_paths = syn.downloadTableColumns(
	q_submission, "dataFileHandleId")
	return q_submission.asDataFrame(), submission_paths


	def fetch_group_dfhis(syn, group):
	query_string = "select dataFileHandleId from {} where task in {}"
	group_string = "('" + "', '".join(group) + "')"
	train_query = query_string.format(TRAINING_TABLE, group_string)
	test_query = query_string.format(TESTING_TABLE, group_string)
	q_train = syn.tableQuery(train_query).asDataFrame()
	q_test = syn.tableQuery(test_query).asDataFrame()
	return q_train.dataFileHandleId.values, q_test.dataFileHandleId.values


	def get_weighted_mean(scores, weights):
	numer = 0
	denom = sum(weights)
	for w, s in zip(weights, scores):
	if pd.notnull(s):
	numer += w * s
	return numer / denom


	def get_weights(phenotype, taskGroup, task_counts, forColumn="testCases"):
	weights = task_counts.query(
	"taskGroup == '{}' and phenotype == '{}'".format(
	taskGroup, phenotype))
	return weights[forColumn].values


	def get_bootstrap_weights(dfhis, test_table_with_scores, phenotype):
	test_table_with_scores_filtered = test_table_with_scores.loc[dfhis]
	scores = test_table_with_scores_filtered["{}Score".format(phenotype)].values
	scores_without_na = scores[pd.notnull(scores)]
	unique, counts = np.unique(scores_without_na, return_counts=True)
	class_counts = dict(zip(unique, counts))
	for i in range(len(CATEGORY_WEIGHTS[phenotype])):
	if i not in class_counts:
	class_counts[i] = 0
	return class_counts


	def score_all(phenotype, submissions, task_dfhis, task_counts):
	results = {}
	for dfhi in submissions:
	dfhi, path = str(dfhi), str(submissions[dfhi])
	auprc_by_task_group = score(
	phenotype, path, task_dfhis, task_counts, bootstrap)
	results[dfhi] = auprc_by_task_group
	return results


	def score_all_bootstrap(phenotype, submissions, task_dfhis, task_counts,
	num_bootstraps, test_table_with_scores):
	results = {}
	for dfhi in submissions:
	dfhi, path = str(dfhi), str(submissions[dfhi])
	print(dfhi)
	score_info = score_bootstrap(
	phenotype, path, task_dfhis, task_counts,
	num_bootstraps, test_table_with_scores)
	results[dfhi] = score_info
	return results

	def score_bootstrap(phenotype, path, task_dfhis, task_counts,
	num_bootstraps, test_table_with_scores):
	train_X, test_X, train_y, test_y, index = LDopaScorer.read_data(path, phenotype)
	score_info = {}
	for task in task_dfhis:
	relevant_dfhis = task_dfhis[task]
	train_relevant_dfhis = train_X.index.intersection(relevant_dfhis)
	train_X_task = train_X.loc[train_relevant_dfhis]
	train_y_task = train_y.loc[train_relevant_dfhis]
	weights_train = get_weights(phenotype, task, task_counts, "trainCases")
	classes = [i for i in range(len(weights_train))
	if weights_train[i] > 0]
	if len(train_X_task):
	ensemble = LDopaScorer.train_ensemble(
	train_X_task.values, train_y_task.values)
	test_relevant_dfhis = test_X.index.intersection(relevant_dfhis)
	for i in range(num_bootstraps):
	weights_test = {0: None}
	while 1 not in weights_test:
	test_relevant_dfhis_bootstrap = np.random.choice(
	np.array([j for j in test_relevant_dfhis]),
	size=len(test_relevant_dfhis), replace=True)
	weights_test = get_bootstrap_weights(
	test_relevant_dfhis_bootstrap, test_table_with_scores, phenotype)
	test_X_task = test_X.loc[test_relevant_dfhis_bootstrap]
	test_y_task = test_y.loc[test_relevant_dfhis_bootstrap]
	results, y_score, y_true = LDopaScorer.getNonLinearInterpAupr(
	test_X_task, test_y_task, classes, ensemble)
	if phenotype == 'tremor':
	weighted_aupr = get_weighted_mean(results,
	[weights_test.setdefault(k, 0) for k in range(5)])
	else:
	weighted_aupr = results[0]
	score_info[(task, i)] = {
	'aupr': weighted_aupr, 'weights': weights_test}
	return score_info


	def score(phenotype, path, task_dfhis, task_counts):
	train_X, test_X, train_y, test_y, index = LDopaScorer.read_data(path, phenotype)
	task_scores = {}
	for task in task_dfhis:
	relevant_dfhis = task_dfhis[task]
	train_relevant_dfhis = train_X.index.intersection(relevant_dfhis)
	test_relevant_dfhis = test_X.index.intersection(relevant_dfhis)
	train_X_task = train_X.loc[train_relevant_dfhis]
	test_X_task = test_X.loc[test_relevant_dfhis]
	train_y_task = train_y.loc[train_relevant_dfhis]
	test_y_task = test_y.loc[test_relevant_dfhis]
	weights_train = get_weights(phenotype, task, task_counts, "trainCases")
	weights_test = get_weights(phenotype, task, task_counts, "testCases")
	classes = [i for i in range(len(weights_train))
	if weights_train[i] > 0]
	if len(train_X_task) and len(test_X_task):
	ensemble = LDopaScorer.train_ensemble(
	train_X_task.values, train_y_task.values)
	results, y_score, y_true = LDopaScorer.getNonLinearInterpAupr(
	test_X_task, test_y_task, classes, ensemble)
	print("results", results)
	if phenotype == 'tremor':
	weighted_aupr = get_weighted_mean(results, weights_test)
	else:
	weighted_aupr = results[0]
	task_scores[task] = weighted_aupr
	return task_scores


	def write_scores_to_file(scores, phenotype, submissions):
	scores_as_list = []
	tasks = None
	for dfhi in scores:
	tasks = scores[dfhi].keys()
	break
	for dfhi in scores:
	score_row = np.append([dfhi], [scores[dfhi][t] for t in tasks])
	scores_as_list.append(score_row)
	scores = pd.DataFrame(
	scores_as_list,
	columns=np.append(['dataFileHandleId'], tasks))
	# two hours later, dtypes must match...
	submissions = submissions.dropna()
	scores.dataFileHandleId = scores.dataFileHandleId.astype(int)
	submissions.dataFileHandleId = submissions.dataFileHandleId.astype(int)
	submissions.submissionId = submissions.submissionId.astype(int)
	submissions = submissions.merge(scores, on="dataFileHandleId")
	submissions = submissions.drop("dataFileHandleId", axis=1)
	submissions.to_csv("{}_scores_by_task.csv".format(phenotype),
	index=False, header=True)
	return scores

	def write_bootstrap_scores_to_file(scores, phenotype, submissions):
	scores_as_list = []
	tasks = None
	for dfhi in scores:
	tasks = list(np.unique([i[0] for i in scores[dfhi].keys()]))
	break
	for dfhi in scores:
	for score_info in scores[dfhi]:
	task, bootstrap_num = score_info
	aupr, weights = scores[dfhi][score_info]['aupr'], \
	scores[dfhi][score_info]['weights']
	class_weights = [weights.setdefault(i, 0) for i in range(5)]
	score_row = [dfhi, phenotype, task, bootstrap_num, aupr] + class_weights
	scores_as_list.append(score_row)
	scores = pd.DataFrame(
	scores_as_list,
	columns=['dataFileHandleId', 'phenotype', 'task', 'bootstrap', 'AUPR',
	'testClassCount_0', 'testClassCount_1',
	'testClassCount_2', 'testClassCount_3',
	'testClassCount_4'])
	# two hours later, dtypes must match...
	submissions = submissions.dropna()
	scores.dataFileHandleId = scores.dataFileHandleId.astype(int)
	submissions.dataFileHandleId = submissions.dataFileHandleId.astype(int)
	submissions.submissionId = submissions.submissionId.astype(int)
	submissions = submissions.merge(scores, on="dataFileHandleId")
	submissions = submissions.drop("dataFileHandleId", axis=1)
	submissions.to_csv("{}_bootstrap_scores_by_task.csv".format(phenotype),
	index=False, header=True)
	return scores

	def main():
	args = read_args()
	syn = sc.login()
	task_dfhis = fetch_task_dfhis(syn)
	task_counts = count_test_cases_for_each_task(syn)
	test_table_with_scores = get_table(syn, TESTING_WITH_SCORES_TABLE)
	for phenotype in SUBMISSION_TABLES:
	submissions, submission_paths = fetch_submissions(syn, phenotype)
	if args.num_bootstraps:
	scores = score_all_bootstrap(
	phenotype, submission_paths, task_dfhis, task_counts,
	args.num_bootstraps, test_table_with_scores)
	write_bootstrap_scores_to_file(scores, phenotype, submissions)
	else:
	scores = score_all(phenotype, submission_paths, task_dfhis, task_counts)
	write_scores_to_file(scores, phenotype, submissions)


	if __name__ == "__main__":
	main()