flashton2003/roc_curve_ct.py

## roc_curve_ct.py
# a list of tuples, one tuple per sample
# within the tuple is (Ct value, percentage of genome covered at 20x)
all_samples = [(35.7, 0), (32.7, 0), (27.6, 0), (29.8, 0), (31.2, 0), (30.6, 0), (29.8, 0), (31.4, 0), (27.6, 0), (30, 0), (30.8, 0), (29.9, 0), (28.7, 0), (20.8, 0), (23.1, 0), (28.2, 0), (32.3, 0), (27.2, 2.33), (29, 3.33), (28.7, 6.25), (29.3, 6.6), (25.8, 9.38), (36.6, 9.7), (27.5, 13.16), (14.5, 15.17), (25.8, 21.84), (22.6, 24.02), (25.9, 25.77), (26.2, 26.18), (27.5, 31.05), (28.4, 35.62), (28.6, 42.34), (28.3, 44.2), (25.1, 44.92), (28.3, 45.42), (24.9, 47.9), (28.8, 48.85), (27.3, 49.63), (30.2, 51.79), (29.6, 52.48), (27, 56.45), (31.5, 63.44), (23.8, 63.89), (16.1, 63.9), (22.3, 63.9), (17.1, 63.91), (31.5, 67.75), (30.6, 72.76), (24.9, 73.66), (15.2, 76.66), (24.6, 77.6), (13.1, 77.69), (26.2, 82.32), (26.9, 83.05), (29.7, 84.68), (26.7, 84.99), (29.5, 85.36), (14.3, 86.2), (26.2, 86.73), (13.8, 87.73), (23.3, 88.24), (30.6, 89.36), (24.4, 89.79), (25.7, 90.14), (25.9, 90.3), (24.6, 90.71), (28.6, 91.34), (26.6, 91.7), (14.9, 91.99), (23.6, 92.22), (18, 92.28), (18.3, 92.43), (21.5, 92.51), (23.6, 92.6), (23.9, 92.6), (26.1, 92.61), (26, 92.61), (22.8, 92.62), (24.6, 92.64), (23.5, 92.65), (26.1, 94.04), (22.3, 94.05), (24.5, 94.05), (17.4, 94.06), (14, 94.09), (24.2, 94.46), (26.6, 94.65), (13.5, 94.66), (20.7, 94.68), (22.2, 94.69), (23.1, 94.8), (27, 94.82), (23.8, 94.83), (23, 94.84), (24.3, 94.84), (27.7, 94.84), (21.5, 94.85), (27.6, 94.85), (25.9, 94.86), (22.5, 94.86), (17, 94.86), (26, 94.87), (23.1, 94.96), (25.7, 95.04), (23.9, 95.09), (23.5, 96.53), (18.6, 96.94), (16.4, 96.94), (19.5, 97), (18.5, 97.01), (22.4, 97.02), (21.1, 97.02), (23, 97.13), (15.9, 97.13), (21, 97.15), (13.9, 97.22), (17.6, 97.23), (21.3, 97.26), (22.8, 97.26), (23.2, 97.26), (15.1, 97.26), (26, 97.27), (24.5, 97.27), (21.7, 97.27), (18.1, 97.27), (15.4, 97.27), (17.1, 97.27), (16, 97.27), (16.1, 97.27), (15.6, 97.27), (17.4, 97.27), (12.2, 97.27), (15.8, 97.27), (21.4, 97.27), (19.7, 97.27), (15.4, 97.27), (17.1, 97.27), (17.2, 97.27), (24.6, 97.27), (19.5, 97.27), (18.7, 97.27), (21.6, 97.27), (19.9, 97.27), (25.3, 97.27), (15.1, 97.28), (20, 97.28), (20.5, 97.28), (21, 97.28), (15.6, 97.28), (19.1, 97.28), (15.3, 97.28), (15.8, 97.28), (20.8, 97.28), (18.1, 97.28), (17.8, 97.28), (17.6, 97.28), (17.8, 97.28), (16.2, 97.28), (19.8, 97.28), (16.6, 97.28), (17, 97.28), (16.6, 97.28), (19.9, 97.28), (18, 97.28), (14.1, 97.28), (17.2, 97.28), (16.8, 97.28), (14.9, 97.28), (15.8, 97.28), (17.4, 97.28), (16.5, 97.28), (17.3, 97.28), (19.1, 97.29), (15.3, 97.29), (14.1, 97.29), (17.2, 97.29), (21.2, 98.61), (13.5, 99.13), (20.9, 99.2), (21.9, 99.29), (13.5, 99.36), (24.4, 99.42), (13, 99.42), (18.2, 99.43), (24.6, 99.43), (13.4, 99.43), (15.9, 99.43), (18.1, 99.43), (19.3, 99.43), (15.6, 99.43), (19.3, 99.44), (18.8, 99.44), (21.7, 99.44), (18.6, 99.44), (19.6, 99.44), (14.5, 99.44), (14.9, 99.44), (14.9, 99.44), (17.1, 99.44), (19.8, 99.44), (19.5, 99.44), (20.5, 99.44), (19, 99.44), (14.3, 99.44), (18.7, 99.45), (12, 99.45), (20, 99.45), (30.8, 0), (31.4, 0), (14.9, 0), (24, 0), (27.5, 0), (29.4, 0), (19.6, 0), (31.9, 0)]


def roc_curve(d):
    output = {}
    # iterate over a range of values which represent the Ct thresholds you're
    # interested in testing
    for i in range(15, 40):
        below_threshold = [x for x in d if x[0] < i]
        above_threshold = [x for x in d if x[0] >= i]
        true_positives = len([x for x in below_threshold if x[1] >= 70])
        false_positives = len([x for x in below_threshold if x[1] < 70]) # fp is being below the threshold when you didn't produce good genome
        true_negatives = len([x for x in above_threshold if x[1] < 70])
        false_negatives = len([x for x in above_threshold if x[1] >= 70])
        tpr = true_positives / (true_positives + false_negatives)
        fpr = false_positives / (true_negatives + false_positives)
        output[i] = (tpr, fpr)
        # over_70 = len([x for x in d if x[1] >= 70 and x[0] < i])
        # under_70 = len([x for x in d if x[1] < 70 and x[0] < i])
        # output[i] = (under_70, over_70)
    for i in range(15, 40):
        print(i, output[i][0], output[i][1], sep = '\t')

roc_curve(all_samples)
	# a list of tuples, one tuple per sample
	# within the tuple is (Ct value, percentage of genome covered at 20x)
	all_samples = [(35.7, 0), (32.7, 0), (27.6, 0), (29.8, 0), (31.2, 0), (30.6, 0), (29.8, 0), (31.4, 0), (27.6, 0), (30, 0), (30.8, 0), (29.9, 0), (28.7, 0), (20.8, 0), (23.1, 0), (28.2, 0), (32.3, 0), (27.2, 2.33), (29, 3.33), (28.7, 6.25), (29.3, 6.6), (25.8, 9.38), (36.6, 9.7), (27.5, 13.16), (14.5, 15.17), (25.8, 21.84), (22.6, 24.02), (25.9, 25.77), (26.2, 26.18), (27.5, 31.05), (28.4, 35.62), (28.6, 42.34), (28.3, 44.2), (25.1, 44.92), (28.3, 45.42), (24.9, 47.9), (28.8, 48.85), (27.3, 49.63), (30.2, 51.79), (29.6, 52.48), (27, 56.45), (31.5, 63.44), (23.8, 63.89), (16.1, 63.9), (22.3, 63.9), (17.1, 63.91), (31.5, 67.75), (30.6, 72.76), (24.9, 73.66), (15.2, 76.66), (24.6, 77.6), (13.1, 77.69), (26.2, 82.32), (26.9, 83.05), (29.7, 84.68), (26.7, 84.99), (29.5, 85.36), (14.3, 86.2), (26.2, 86.73), (13.8, 87.73), (23.3, 88.24), (30.6, 89.36), (24.4, 89.79), (25.7, 90.14), (25.9, 90.3), (24.6, 90.71), (28.6, 91.34), (26.6, 91.7), (14.9, 91.99), (23.6, 92.22), (18, 92.28), (18.3, 92.43), (21.5, 92.51), (23.6, 92.6), (23.9, 92.6), (26.1, 92.61), (26, 92.61), (22.8, 92.62), (24.6, 92.64), (23.5, 92.65), (26.1, 94.04), (22.3, 94.05), (24.5, 94.05), (17.4, 94.06), (14, 94.09), (24.2, 94.46), (26.6, 94.65), (13.5, 94.66), (20.7, 94.68), (22.2, 94.69), (23.1, 94.8), (27, 94.82), (23.8, 94.83), (23, 94.84), (24.3, 94.84), (27.7, 94.84), (21.5, 94.85), (27.6, 94.85), (25.9, 94.86), (22.5, 94.86), (17, 94.86), (26, 94.87), (23.1, 94.96), (25.7, 95.04), (23.9, 95.09), (23.5, 96.53), (18.6, 96.94), (16.4, 96.94), (19.5, 97), (18.5, 97.01), (22.4, 97.02), (21.1, 97.02), (23, 97.13), (15.9, 97.13), (21, 97.15), (13.9, 97.22), (17.6, 97.23), (21.3, 97.26), (22.8, 97.26), (23.2, 97.26), (15.1, 97.26), (26, 97.27), (24.5, 97.27), (21.7, 97.27), (18.1, 97.27), (15.4, 97.27), (17.1, 97.27), (16, 97.27), (16.1, 97.27), (15.6, 97.27), (17.4, 97.27), (12.2, 97.27), (15.8, 97.27), (21.4, 97.27), (19.7, 97.27), (15.4, 97.27), (17.1, 97.27), (17.2, 97.27), (24.6, 97.27), (19.5, 97.27), (18.7, 97.27), (21.6, 97.27), (19.9, 97.27), (25.3, 97.27), (15.1, 97.28), (20, 97.28), (20.5, 97.28), (21, 97.28), (15.6, 97.28), (19.1, 97.28), (15.3, 97.28), (15.8, 97.28), (20.8, 97.28), (18.1, 97.28), (17.8, 97.28), (17.6, 97.28), (17.8, 97.28), (16.2, 97.28), (19.8, 97.28), (16.6, 97.28), (17, 97.28), (16.6, 97.28), (19.9, 97.28), (18, 97.28), (14.1, 97.28), (17.2, 97.28), (16.8, 97.28), (14.9, 97.28), (15.8, 97.28), (17.4, 97.28), (16.5, 97.28), (17.3, 97.28), (19.1, 97.29), (15.3, 97.29), (14.1, 97.29), (17.2, 97.29), (21.2, 98.61), (13.5, 99.13), (20.9, 99.2), (21.9, 99.29), (13.5, 99.36), (24.4, 99.42), (13, 99.42), (18.2, 99.43), (24.6, 99.43), (13.4, 99.43), (15.9, 99.43), (18.1, 99.43), (19.3, 99.43), (15.6, 99.43), (19.3, 99.44), (18.8, 99.44), (21.7, 99.44), (18.6, 99.44), (19.6, 99.44), (14.5, 99.44), (14.9, 99.44), (14.9, 99.44), (17.1, 99.44), (19.8, 99.44), (19.5, 99.44), (20.5, 99.44), (19, 99.44), (14.3, 99.44), (18.7, 99.45), (12, 99.45), (20, 99.45), (30.8, 0), (31.4, 0), (14.9, 0), (24, 0), (27.5, 0), (29.4, 0), (19.6, 0), (31.9, 0)]


	def roc_curve(d):
	output = {}
	# iterate over a range of values which represent the Ct thresholds you're
	# interested in testing
	for i in range(15, 40):
	below_threshold = [x for x in d if x[0] < i]
	above_threshold = [x for x in d if x[0] >= i]
	true_positives = len([x for x in below_threshold if x[1] >= 70])
	false_positives = len([x for x in below_threshold if x[1] < 70]) # fp is being below the threshold when you didn't produce good genome
	true_negatives = len([x for x in above_threshold if x[1] < 70])
	false_negatives = len([x for x in above_threshold if x[1] >= 70])
	tpr = true_positives / (true_positives + false_negatives)
	fpr = false_positives / (true_negatives + false_positives)
	output[i] = (tpr, fpr)
	# over_70 = len([x for x in d if x[1] >= 70 and x[0] < i])
	# under_70 = len([x for x in d if x[1] < 70 and x[0] < i])
	# output[i] = (under_70, over_70)
	for i in range(15, 40):
	print(i, output[i][0], output[i][1], sep = '\t')

	roc_curve(all_samples)