carolinecunningham/boxplots.py

## boxplots.py
####warning: only can be run locally & may depend on external code. this is just a place to store this

############### boxplot 1: APOBEC ####################
 apobec_ints = {}


 apobec = ["BLAC_apobec", "stomach_apobec", "breast_apobec", "cervix_apobec", "colonfin_apobec", "pancreas_apobec",
           "glioblast_apobec", "HNCarcinoma_apobec", "pancreatic_apobec", "liver_apobec", "lung_apobec", "glioma_apobec",
           "prostate_apobec", "skin_apobec", "thyroid_apobec"]


 cancer_types = ['Bladder', 'Breast', 'Cervical', 'Colon', 'Glioblastoma', 'Head & Neck', 'Pancreatic', 'Liver',
                 'Lung', 'Kidney','Prostate', 'Skin', 'Stomach', 'Thyroid', 'Glioma']

 y_pos = np.arange(len(cancer_types))

 for ky in apobec:
     list_dir = []
     for val in cancer_enrich_dir[ky]:
         if len(val) > 0:
             list_dir.append(float(val))
     apobec_ints[ky] = list_dir
 print(apobec_ints)


 apobec_data = [apobec_ints["BLAC_apobec"], apobec_ints["breast_apobec"], apobec_ints["cervix_apobec"],
                apobec_ints["colonfin_apobec"], apobec_ints["glioblast_apobec"], apobec_ints["HNCarcinoma_apobec"],
                apobec_ints["pancreas_apobec"], apobec_ints["liver_apobec"], apobec_ints["lung_apobec"],
                apobec_ints["pancreatic_apobec"], apobec_ints["prostate_apobec"], apobec_ints["skin_apobec"],
                apobec_ints["stomach_apobec"], apobec_ints["thyroid_apobec"], apobec_ints["glioma_apobec"]]


 fig, ax = plt.subplots()
 testPlot = ax.boxplot(apobec_data, showfliers=False)

 plt.xticks(y_pos, cancer_types)

 ax.set_title("Graph 1. APOBEC Enrichment for Cancer Types", y=1.05)
 ax.set_ylabel("APOBEC Enrichment", labelpad=10)
 ax.set_xlabel("Cancer Type", labelpad=10)

 ax.legend()

 for tick in ax.get_xticklabels():
     tick.set_rotation(90)

 plt.xticks(np.arange(1, 16))
 plt.yticks(np.arange(0, 15, 1))

 plt.tight_layout()

 plt.savefig('apobec_boxplot_enrich.png', dpi = 500)


 ############### boxplot 2: UV ####################


 uv_ints = {}


 uv = ["BLAC_uv", "stomach_uv", "breast_uv", "cervix_uv", "colon_uv", "pancreas_uv",
       "glioblast_uv", "HNCarcinoma_uv", "pancreatic_uv", "liver_uv", "lung_uv", "glioma_uv",
        "prostate_uv", "skin_uv", "thyroid_uv"]


 cancer_types = ['Bladder', 'Breast', 'Cervical', 'Colon', 'Glioblastoma', 'Head & Neck', 'Pancreatic', 'Liver',
                 'Lung', 'Kidney','Prostate', 'Skin', 'Stomach', 'Thyroid', 'Glioma']

 y_pos = np.arange(len(cancer_types))

 for ky in uv:
     list_dir = []
     for val in cancer_enrich_dir[ky]:
         if len(val) > 0 and val != ",":
             list_dir.append(float(val))
     uv_ints[ky] = list_dir


 uv_data = [uv_ints["BLAC_uv"], uv_ints["breast_uv"], uv_ints["cervix_uv"],
            uv_ints["colon_uv"], uv_ints["glioblast_uv"], uv_ints["HNCarcinoma_uv"],
            uv_ints["pancreas_uv"], uv_ints["liver_uv"], uv_ints["lung_uv"],
            uv_ints["pancreatic_uv"], uv_ints["prostate_uv"], uv_ints["skin_uv"],
            uv_ints["stomach_uv"], uv_ints["thyroid_uv"], uv_ints["glioma_uv"]]


 fig, ax = plt.subplots()
 testPlot = ax.boxplot(uv_data, showfliers=False)

 plt.xticks(y_pos, cancer_types)

 ax.set_title("Graph 2. UV Enrichment for Cancer Types", y=1.05)
 ax.set_ylabel("UV Enrichment", labelpad=10)
 ax.set_xlabel("Cancer Type", labelpad=10)

 ax.legend()

 for tick in ax.get_xticklabels():
     tick.set_rotation(90)

 plt.xticks(np.arange(1, 16))
 plt.yticks(np.arange(0, 3, 0.25))

 plt.tight_layout()

 plt.savefig('uv_boxplot_enrich.png', dpi = 500)


 ############### boxplot 4: aid ####################

 make_aid_boxplot()

 ############### boxplot 5: aging ####################

 make_ag_boxplot()
 make_ag_prop()

     cancer_sig_files = []

     for cancer_type in cancer_genomes_dirs:
         #cancer_sig_files.append((cancer_type + '_correct_apobec_sig.csv'))
         #cancer_sig_files.append((cancer_type + '_correct_tobacco_sig.csv'))
         #cancer_sig_files.append((cancer_type + '_correct_uv_sig.csv'))
         cancer_sig_files.append((cancer_type + '_correct_aid_sig.csv'))
         #cancer_sig_files.append((cancer_type + '_correct_ag_sig.csv'))


     for cancer_file in cancer_sig_files:
         with open(cancer_file, 'r') as csvfile:
             p_value_list = []
             reader = csv.reader(csvfile, delimiter=',');
             for row in reader:
                 #if "chi" in row[0]:
                    #p_value_list.append(1.)
                 #elif round(float(row[0]), 10) == 0:
                     #p_value_list.append(1E-10)

                 p_value_list.append(round(float(row[0]), 300))
             summary_p_val = stats.combine_pvalues(p_value_list)
             print(summary_p_val[1], cancer_file)


     for cancer_type in cancer_genomes_dirs:

         with open (cancer_type + '_correct_apobec_sig.csv', 'w') as csvfile:
             spamwriter = csv.writer(csvfile, delimiter= ' ',
                                 quotechar='|', quoting=csv.QUOTE_MINIMAL)
             p_val_list = []
             for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
                 if file_i == ".DS_Store":
                     continue
                 with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file:
                     mut = file.read()
                     _, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
                     observed = get_enrichment(mutations_with_context, "TCW", 1, "C", "K", 50)

                     if 1.0 >= observed[3] >= 0.0:
                         appender = float(observed[3])
                         spamwriter.writerow([appender])
                     else:
                         if 1.0 >= observed[2] >= 0.0:
                             appender = float(observed[2])
                             spamwriter.writerow([appender])

             for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/new_breast"):
                 if file_i == ".DS_Store":
                     continue
                 with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file:
                     mut = file.read()
                     _, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
                     observed = get_enrichment(mutations_with_context, "TCW", 1, "C", "K", 50)
                     appender = [observed[1:2]]
                     spamwriter.writerows(appender)


         with open (cancer_type + '_correct_ag_sig.csv', 'w') as csvfile:
                 spamwriter = csv.writer(csvfile, delimiter= ' ',
                                     quotechar='|', quoting=csv.QUOTE_MINIMAL)
                 for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
                     if file_i == ".DS_Store":
                         continue
                     with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file:
                         mut = file.read()
                         _, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
                         observed = get_enrichment(mutations_with_context, "CG", 0, "C", "T", 50)
                         if 1.0 >= observed[3] >= 0.0:
                             appender = float(observed[3])
                             spamwriter.writerow([appender])
                         else:
                             if 1.0 >= observed[2] >= 0.0:
                                 appender = float(observed[2])
                                 spamwriter.writerow([appender])


                 for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
                     if file_i == ".DS_Store":
                         continue
                     with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file:
                         mut = file.read()
                         _, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
                         observed = get_enrichment(mutations_with_context, "CG", 0, "C", "T", 50)
                         appender = [observed[1:2]]
                         spamwriter.writerows(appender)


         with open(cancer_type + '_correct_uv_sig.csv', 'w') as csvfile:
             spamwriter = csv.writer(csvfile, delimiter=' ',
                                     quotechar='|', quoting=csv.QUOTE_MINIMAL)
             for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
                 if file_i == ".DS_Store":
                     continue
                 with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file:
                     mut = file.read()
                     _, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
                     observed = get_enrichment(mutations_with_context, "YC", 1, "C", "T", 50)
                     if 1.0 >= observed[3] >= 0.0:
                         appender = float(observed[3])
                         spamwriter.writerow([appender])
                     else:
                         if 1.0 >= observed[2] >= 0.0:
                             appender = float(observed[2])
                             spamwriter.writerow([appender])
                     #logo = create_logo(mutations_with_context, "TCW", 1, "C", "K", 50)


             for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/new_breast"):
                 if file_i == ".DS_Store":
                     continue
                 with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file:
                     mut = file.read()
                     _, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
                     observed = get_enrichment(mutations_with_context, "YC", 1, "C", "T", 50)
                     appender = [observed[1:2]]
                     spamwriter.writerows(appender)


         with open(cancer_type + '_correct_aid_sig.csv', 'w') as csvfile:

             spamwriter = csv.writer(csvfile, delimiter=' ',
                                     quotechar='|', quoting=csv.QUOTE_MINIMAL)
             for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
                 if file_i == ".DS_Store":
                     continue
                 with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file:
                     mut = file.read()
                     _, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
                     observed = get_enrichment(mutations_with_context, "NTW", 1, "T", "S", 50)
                     if 1.0 >= observed[3] >= 0.0:
                         appender = float(observed[3])
                         spamwriter.writerow([appender])
                     else:
                         if 1.0 >= observed[2] >= 0.0:
                             appender = float(observed[2])
                             spamwriter.writerow([appender])


             for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/new_breast"):
                 if file_i == ".DS_Store":
                     continue
                 with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file:
                     mut = file.read()
                     _, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
                     observed = get_enrichment(mutations_with_context, "NTW", 1, "T", "S", 50)
                     appender = [observed[1:2]]
                     spamwriter.writerows(appender)


         with open(cancer_type + '_correct_tobacco_sig.csv', 'w') as csvfile:
             spamwriter = csv.writer(csvfile, delimiter=' ',
                                     quotechar='|', quoting=csv.QUOTE_MINIMAL)
             for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
                 if file_i == ".DS_Store" or file_i == "Manifest.txt":
                     continue
                 with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file:
                     mut = file.read()
                     _, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
                     observed = get_enrichment(mutations_with_context, "CG", 0, "C", "R", 50)
                     if 1.0 >= observed[3] >= 0.0:
                         appender = float(observed[3])
                     else:
                         if 1.0 >= observed[2] >= 0.0:
                             appender = float(observed[2])
                     spamwriter.writerow([appender])


             for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/new_breast"):
                 if file_i == ".DS_Store" or file_i == "Manifest.txt":
                     continue
                 with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file:
                     mut = file.read()
                     _, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
                     observed = get_enrichment(mutations_with_context, "CG", 0, "C", "R", 50)
                     appender = [observed[1:2]]
                     spamwriter.writerows(appender)


## graphs.py
##bargraphs code for GOOGLE SF

import matplotlib as mpl
mpl.use('TkAgg')

import matplotlib.pyplot as plt
plt.style.use('seaborn-ticks')

import numpy as np
import math
import pylab


#TO-DO: make graphs of enrichment for different cancer types
#remove leukemia from analysis
#make table with significance for different p-values, use python to do this

#order of data

#bladder, breast, cervical
#colon adenocarcinoma, glioblastoma multiforme, head and neck squamous cell carcinoma, pancreatic cancer
#liver hepatocellular carcinoma, lung cancer, pan-kidney, prostate cancer, skin cutaneous melanoma, stomach adenocarcinoma, thyroid carcinoma, & glioma

data_counts = {"bladder":130, "breast":983, "cervical":195, "colon adenocarcinoma":155, "glioblastoma multiforme":291,
               "head and neck squamous cell carcinoma":280, "pancreatic cancer":151, "liver hepatocellular carcinoma":199,
               "lung cancer":179, "pan-kidney":645, "prostate cancer":333, "skin cutaneous melanoma":346, "stomach adenocarcinoma":290,
               "thyroid carcinoma":406, "glioma":577}

#name = "APOBEC"
#num = "1"
#data = [5.56, 2.248, 4.842, 0.4954, 0.4485, 2.535, 0.5659, 0.8392, 2.165, 0.9112, 1.1833, 1.248, 0.6218, 1.219, 0.4618]
#error = [2.9657, 2.7965, 3.523, 0.3219, 0.4156, 2.196, 0.6097, 0.416, 1.7013, 0.6839, 1.2873, 0.378, 0.5518, 2.0621, 0.4598]
#confidence = [0.51, 0.175, 0.494, 0.0507, 0.0478, 0.257, 0.0972, 0.0578, 0.249, 0.0529, 0.138, 0.0398, 0.0635, 0.201, 0.168]

#name = "UV"
#num = "2"
#data = [1.329, 0.9279, 1.19, 0.3407, 0.7913, 0.992, 0.5491, 0.7873, 0.9906, 0.7446, 0.7914, 1.556, 0.6514, 0.7334, 0.5506]
#error = [0.3155, 0.4582, 0.4478, 0.4423, .1269, 0.3195, 0.1996, 0.7873, 0.2789, 0.3155, 0.4416, 0.3491, 0.1855, 0.5577, 0.224]
#confidence = [0.0542, 0.0286, 0.0629, 0.0696, 0.0146, 0.0374, 0.0318, 0.109, 0.0409, 0.0243, 0.0474, 0.0368, 0.0213, 0.0542, 0.0183]

#name = "Tobacco"
#num = "3"
#data = [1.2023, 2.5748, 1.735, 0.4773, 2.0803, 2.1615, 2.8028, 3.43, 3.4835, 2.5836, 1.4285, 4.0497, 1.9877, 3.3275, 2.2154]
#error = [0.3724, 3.0696, 1.4121, 1.2318, 2.043, 2.1504, 3.8017, 2.1, 1.237, 2.1173, 2.488, 3.1189, 1.8416, 5.7224, 3.0512]
#confidence = [0.064, 0.192, 0.198, 0.194, 0.234, 0.252, 0.606, 0.292, 0.181, 0.163, 0.267, 0.329, 0.212, 0.557, 0.249]


#name = "AID"
#num = "4"
#data = [0.8035, 1.367, 1.4454, 0.4378, 1.4918, 1.6874, 1.8327, 1.1884, 1.3214, 1.3699, 1.6722, 1.8702, 2.0087, 1.1954, 1.4724]
#error = [0.7164, 1.4536, 1.5265, 0.4906, 1.2961, 1.2349, 1.5868, 0.6354, 0.5401, 1.0304, 1.6324, 0.9295, 1.2839, 1.5418,1.4426]
#confidence = [0.123, 0.0448, 0.214, 0.0772, 0.149, 0.145, 0.253, 0.0883, 0.0791, 0.0795, 0.175, 0.0979, 0.148, 0.15, 0.118]

counts = {"bladder":130, "breast":983, "cervical":195, "colon adenocarcinoma":155, "glioblastoma multiforme":291,
               "head and neck squamous cell carcinoma":280, "pancreatic cancer":151, "liver hepatocellular carcinoma":199,
               "lung cancer":179, "pan-kidney":645, "prostate cancer":333, "skin cutaneous melanoma":346, "stomach adenocarcinoma":290,
               "thyroid carcinoma":406, "glioma":577}

name = "Aging"
num = "5"
data = [6.2851, 10.3227, 8.2077, 0.913, 16.4029, 9.0951, 14.8916, 7.0081, 6.349, 6.8536, 6.3962, 4.8882, 14.8384, 9.5127, 17.5606]
error = [2.4605, 6.126, 4.0094, 1.1901, 4.8837, 3.4547, 4.1749, 3.2267, 1.9739, 3.6531, 5.687, 3.3729, 4.3531, 8.4229, 8.1927]
confidence = [0.423, 0.383, 0.563, 0.187, 0.561, 0.405, 0.666, 0.448, 0.289, 0.282, 0.61, 0.355, 0.501, 0.819, 0.668]

cancer_types = ['Bladder', 'Breast', 'Cervical', 'Colon', 'Glioblastoma', 'Head & Neck', 'Pancreatic', 'Liver',
                'Lung', 'Kidney','Prostate', 'Skin', 'Stomach', 'Thyroid', 'Glioma']

y_pos = np.arange(len(cancer_types))

fig, ax = plt.subplots()

plt.size = (1200, 1200)

bar = ax.bar(cancer_types, data, align="center", yerr=confidence, alpha = 0.5)
ax.set_xticks(y_pos, cancer_types)

ax.set_title("Graph " + num + "." + " " + name + " Enrichment for Cancer Types", y=1.05)
ax.set_ylabel(name + " Enrichment", labelpad=10)
ax.set_xlabel("Cancer Type",labelpad=10)

threshold = 1.0

above_threshold = min(data)
below_threshold = max(data)

############### APOBEC GRAPH SETTINGS ##############

#ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "7 out of 15 Cancers Had Significant " + name + " Enrichment")
#ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0")
#plt.yticks(np.arange(-1, 7, 1))

############### APOBEC GRAPH SETTINGS ##############

############### UV GRAPH SETTINGS ##############

#ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "3 out of 15 Cancers Had Significant " + name + " Enrichment")
#ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0")
#plt.yticks(np.arange(0, 2.5, 0.5))

############### UV GRAPH SETTINGS ##############

############### Tobacco GRAPH SETTINGS ##############

#ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "14 out of 15 Cancers Had Significant " + name + " Enrichment")
#ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0")
#plt.yticks(np.arange(0, 6, 0.5))

############### Tobacco GRAPH SETTINGS ##############

############### AID GRAPH SETTINGS ##############

#ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "13 out of 15 Cancers Had Significant " + name + " Enrichment")
#ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0")
#plt.yticks(np.arange(0, 3.5, 0.5))

############### AID GRAPH SETTINGS ##############


############### Aging GRAPH SETTINGS ##############

ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "14 out of 15 Cancers Had Significant " + name + " Enrichment")
ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0")
plt.yticks(np.arange(0, 24, 2))

############### Aging GRAPH SETTINGS ##############

ax.legend()


for tick in ax.get_xticklabels():
    tick.set_rotation(90)

plt.tight_layout()

ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')


plt.savefig(name + '_enrich.png', dpi = 500)
plt.show()
	####warning: only can be run locally & may depend on external code. this is just a place to store this

	############### boxplot 1: APOBEC ####################
	apobec_ints = {}


	apobec = ["BLAC_apobec", "stomach_apobec", "breast_apobec", "cervix_apobec", "colonfin_apobec", "pancreas_apobec",
	"glioblast_apobec", "HNCarcinoma_apobec", "pancreatic_apobec", "liver_apobec", "lung_apobec", "glioma_apobec",
	"prostate_apobec", "skin_apobec", "thyroid_apobec"]


	cancer_types = ['Bladder', 'Breast', 'Cervical', 'Colon', 'Glioblastoma', 'Head & Neck', 'Pancreatic', 'Liver',
	'Lung', 'Kidney','Prostate', 'Skin', 'Stomach', 'Thyroid', 'Glioma']

	y_pos = np.arange(len(cancer_types))

	for ky in apobec:
	list_dir = []
	for val in cancer_enrich_dir[ky]:
	if len(val) > 0:
	list_dir.append(float(val))
	apobec_ints[ky] = list_dir
	print(apobec_ints)


	apobec_data = [apobec_ints["BLAC_apobec"], apobec_ints["breast_apobec"], apobec_ints["cervix_apobec"],
	apobec_ints["colonfin_apobec"], apobec_ints["glioblast_apobec"], apobec_ints["HNCarcinoma_apobec"],
	apobec_ints["pancreas_apobec"], apobec_ints["liver_apobec"], apobec_ints["lung_apobec"],
	apobec_ints["pancreatic_apobec"], apobec_ints["prostate_apobec"], apobec_ints["skin_apobec"],
	apobec_ints["stomach_apobec"], apobec_ints["thyroid_apobec"], apobec_ints["glioma_apobec"]]


	fig, ax = plt.subplots()
	testPlot = ax.boxplot(apobec_data, showfliers=False)

	plt.xticks(y_pos, cancer_types)

	ax.set_title("Graph 1. APOBEC Enrichment for Cancer Types", y=1.05)
	ax.set_ylabel("APOBEC Enrichment", labelpad=10)
	ax.set_xlabel("Cancer Type", labelpad=10)

	ax.legend()

	for tick in ax.get_xticklabels():
	tick.set_rotation(90)

	plt.xticks(np.arange(1, 16))
	plt.yticks(np.arange(0, 15, 1))

	plt.tight_layout()

	plt.savefig('apobec_boxplot_enrich.png', dpi = 500)


	############### boxplot 2: UV ####################


	uv_ints = {}


	uv = ["BLAC_uv", "stomach_uv", "breast_uv", "cervix_uv", "colon_uv", "pancreas_uv",
	"glioblast_uv", "HNCarcinoma_uv", "pancreatic_uv", "liver_uv", "lung_uv", "glioma_uv",
	"prostate_uv", "skin_uv", "thyroid_uv"]


	cancer_types = ['Bladder', 'Breast', 'Cervical', 'Colon', 'Glioblastoma', 'Head & Neck', 'Pancreatic', 'Liver',
	'Lung', 'Kidney','Prostate', 'Skin', 'Stomach', 'Thyroid', 'Glioma']

	y_pos = np.arange(len(cancer_types))

	for ky in uv:
	list_dir = []
	for val in cancer_enrich_dir[ky]:
	if len(val) > 0 and val != ",":
	list_dir.append(float(val))
	uv_ints[ky] = list_dir


	uv_data = [uv_ints["BLAC_uv"], uv_ints["breast_uv"], uv_ints["cervix_uv"],
	uv_ints["colon_uv"], uv_ints["glioblast_uv"], uv_ints["HNCarcinoma_uv"],
	uv_ints["pancreas_uv"], uv_ints["liver_uv"], uv_ints["lung_uv"],
	uv_ints["pancreatic_uv"], uv_ints["prostate_uv"], uv_ints["skin_uv"],
	uv_ints["stomach_uv"], uv_ints["thyroid_uv"], uv_ints["glioma_uv"]]


	fig, ax = plt.subplots()
	testPlot = ax.boxplot(uv_data, showfliers=False)

	plt.xticks(y_pos, cancer_types)

	ax.set_title("Graph 2. UV Enrichment for Cancer Types", y=1.05)
	ax.set_ylabel("UV Enrichment", labelpad=10)
	ax.set_xlabel("Cancer Type", labelpad=10)

	ax.legend()

	for tick in ax.get_xticklabels():
	tick.set_rotation(90)

	plt.xticks(np.arange(1, 16))
	plt.yticks(np.arange(0, 3, 0.25))

	plt.tight_layout()

	plt.savefig('uv_boxplot_enrich.png', dpi = 500)



	############### boxplot 4: aid ####################

	make_aid_boxplot()

	############### boxplot 5: aging ####################

	make_ag_boxplot()
	make_ag_prop()

	cancer_sig_files = []

	for cancer_type in cancer_genomes_dirs:
	#cancer_sig_files.append((cancer_type + '_correct_apobec_sig.csv'))
	#cancer_sig_files.append((cancer_type + '_correct_tobacco_sig.csv'))
	#cancer_sig_files.append((cancer_type + '_correct_uv_sig.csv'))
	cancer_sig_files.append((cancer_type + '_correct_aid_sig.csv'))
	#cancer_sig_files.append((cancer_type + '_correct_ag_sig.csv'))


	for cancer_file in cancer_sig_files:
	with open(cancer_file, 'r') as csvfile:
	p_value_list = []
	reader = csv.reader(csvfile, delimiter=',');
	for row in reader:
	#if "chi" in row[0]:
	#p_value_list.append(1.)
	#elif round(float(row[0]), 10) == 0:
	#p_value_list.append(1E-10)

	p_value_list.append(round(float(row[0]), 300))
	summary_p_val = stats.combine_pvalues(p_value_list)
	print(summary_p_val[1], cancer_file)


	for cancer_type in cancer_genomes_dirs:

	with open (cancer_type + '_correct_apobec_sig.csv', 'w') as csvfile:
	spamwriter = csv.writer(csvfile, delimiter= ' ',
	quotechar='\|', quoting=csv.QUOTE_MINIMAL)
	p_val_list = []
	for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
	if file_i == ".DS_Store":
	continue
	with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file:
	mut = file.read()
	_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
	observed = get_enrichment(mutations_with_context, "TCW", 1, "C", "K", 50)

	if 1.0 >= observed[3] >= 0.0:
	appender = float(observed[3])
	spamwriter.writerow([appender])
	else:
	if 1.0 >= observed[2] >= 0.0:
	appender = float(observed[2])
	spamwriter.writerow([appender])

	for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/new_breast"):
	if file_i == ".DS_Store":
	continue
	with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file:
	mut = file.read()
	_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
	observed = get_enrichment(mutations_with_context, "TCW", 1, "C", "K", 50)
	appender = [observed[1:2]]
	spamwriter.writerows(appender)


	with open (cancer_type + '_correct_ag_sig.csv', 'w') as csvfile:
	spamwriter = csv.writer(csvfile, delimiter= ' ',
	quotechar='\|', quoting=csv.QUOTE_MINIMAL)
	for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
	if file_i == ".DS_Store":
	continue
	with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file:
	mut = file.read()
	_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
	observed = get_enrichment(mutations_with_context, "CG", 0, "C", "T", 50)
	if 1.0 >= observed[3] >= 0.0:
	appender = float(observed[3])
	spamwriter.writerow([appender])
	else:
	if 1.0 >= observed[2] >= 0.0:
	appender = float(observed[2])
	spamwriter.writerow([appender])


	for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
	if file_i == ".DS_Store":
	continue
	with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file:
	mut = file.read()
	_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
	observed = get_enrichment(mutations_with_context, "CG", 0, "C", "T", 50)
	appender = [observed[1:2]]
	spamwriter.writerows(appender)



	with open(cancer_type + '_correct_uv_sig.csv', 'w') as csvfile:
	spamwriter = csv.writer(csvfile, delimiter=' ',
	quotechar='\|', quoting=csv.QUOTE_MINIMAL)
	for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
	if file_i == ".DS_Store":
	continue
	with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file:
	mut = file.read()
	_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
	observed = get_enrichment(mutations_with_context, "YC", 1, "C", "T", 50)
	if 1.0 >= observed[3] >= 0.0:
	appender = float(observed[3])
	spamwriter.writerow([appender])
	else:
	if 1.0 >= observed[2] >= 0.0:
	appender = float(observed[2])
	spamwriter.writerow([appender])
	#logo = create_logo(mutations_with_context, "TCW", 1, "C", "K", 50)


	for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/new_breast"):
	if file_i == ".DS_Store":
	continue
	with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file:
	mut = file.read()
	_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
	observed = get_enrichment(mutations_with_context, "YC", 1, "C", "T", 50)
	appender = [observed[1:2]]
	spamwriter.writerows(appender)


	with open(cancer_type + '_correct_aid_sig.csv', 'w') as csvfile:

	spamwriter = csv.writer(csvfile, delimiter=' ',
	quotechar='\|', quoting=csv.QUOTE_MINIMAL)
	for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
	if file_i == ".DS_Store":
	continue
	with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file:
	mut = file.read()
	_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
	observed = get_enrichment(mutations_with_context, "NTW", 1, "T", "S", 50)
	if 1.0 >= observed[3] >= 0.0:
	appender = float(observed[3])
	spamwriter.writerow([appender])
	else:
	if 1.0 >= observed[2] >= 0.0:
	appender = float(observed[2])
	spamwriter.writerow([appender])


	for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/new_breast"):
	if file_i == ".DS_Store":
	continue
	with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file:
	mut = file.read()
	_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
	observed = get_enrichment(mutations_with_context, "NTW", 1, "T", "S", 50)
	appender = [observed[1:2]]
	spamwriter.writerows(appender)


	with open(cancer_type + '_correct_tobacco_sig.csv', 'w') as csvfile:
	spamwriter = csv.writer(csvfile, delimiter=' ',
	quotechar='\|', quoting=csv.QUOTE_MINIMAL)
	for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
	if file_i == ".DS_Store" or file_i == "Manifest.txt":
	continue
	with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file:
	mut = file.read()
	_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
	observed = get_enrichment(mutations_with_context, "CG", 0, "C", "R", 50)
	if 1.0 >= observed[3] >= 0.0:
	appender = float(observed[3])
	else:
	if 1.0 >= observed[2] >= 0.0:
	appender = float(observed[2])
	spamwriter.writerow([appender])


	for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/new_breast"):
	if file_i == ".DS_Store" or file_i == "Manifest.txt":
	continue
	with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file:
	mut = file.read()
	_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
	observed = get_enrichment(mutations_with_context, "CG", 0, "C", "R", 50)
	appender = [observed[1:2]]
	spamwriter.writerows(appender)
	##bargraphs code for GOOGLE SF

	import matplotlib as mpl
	mpl.use('TkAgg')

	import matplotlib.pyplot as plt
	plt.style.use('seaborn-ticks')

	import numpy as np
	import math
	import pylab


	#TO-DO: make graphs of enrichment for different cancer types
	#remove leukemia from analysis
	#make table with significance for different p-values, use python to do this

	#order of data

	#bladder, breast, cervical
	#colon adenocarcinoma, glioblastoma multiforme, head and neck squamous cell carcinoma, pancreatic cancer
	#liver hepatocellular carcinoma, lung cancer, pan-kidney, prostate cancer, skin cutaneous melanoma, stomach adenocarcinoma, thyroid carcinoma, & glioma

	data_counts = {"bladder":130, "breast":983, "cervical":195, "colon adenocarcinoma":155, "glioblastoma multiforme":291,
	"head and neck squamous cell carcinoma":280, "pancreatic cancer":151, "liver hepatocellular carcinoma":199,
	"lung cancer":179, "pan-kidney":645, "prostate cancer":333, "skin cutaneous melanoma":346, "stomach adenocarcinoma":290,
	"thyroid carcinoma":406, "glioma":577}

	#name = "APOBEC"
	#num = "1"
	#data = [5.56, 2.248, 4.842, 0.4954, 0.4485, 2.535, 0.5659, 0.8392, 2.165, 0.9112, 1.1833, 1.248, 0.6218, 1.219, 0.4618]
	#error = [2.9657, 2.7965, 3.523, 0.3219, 0.4156, 2.196, 0.6097, 0.416, 1.7013, 0.6839, 1.2873, 0.378, 0.5518, 2.0621, 0.4598]
	#confidence = [0.51, 0.175, 0.494, 0.0507, 0.0478, 0.257, 0.0972, 0.0578, 0.249, 0.0529, 0.138, 0.0398, 0.0635, 0.201, 0.168]

	#name = "UV"
	#num = "2"
	#data = [1.329, 0.9279, 1.19, 0.3407, 0.7913, 0.992, 0.5491, 0.7873, 0.9906, 0.7446, 0.7914, 1.556, 0.6514, 0.7334, 0.5506]
	#error = [0.3155, 0.4582, 0.4478, 0.4423, .1269, 0.3195, 0.1996, 0.7873, 0.2789, 0.3155, 0.4416, 0.3491, 0.1855, 0.5577, 0.224]
	#confidence = [0.0542, 0.0286, 0.0629, 0.0696, 0.0146, 0.0374, 0.0318, 0.109, 0.0409, 0.0243, 0.0474, 0.0368, 0.0213, 0.0542, 0.0183]

	#name = "Tobacco"
	#num = "3"
	#data = [1.2023, 2.5748, 1.735, 0.4773, 2.0803, 2.1615, 2.8028, 3.43, 3.4835, 2.5836, 1.4285, 4.0497, 1.9877, 3.3275, 2.2154]
	#error = [0.3724, 3.0696, 1.4121, 1.2318, 2.043, 2.1504, 3.8017, 2.1, 1.237, 2.1173, 2.488, 3.1189, 1.8416, 5.7224, 3.0512]
	#confidence = [0.064, 0.192, 0.198, 0.194, 0.234, 0.252, 0.606, 0.292, 0.181, 0.163, 0.267, 0.329, 0.212, 0.557, 0.249]


	#name = "AID"
	#num = "4"
	#data = [0.8035, 1.367, 1.4454, 0.4378, 1.4918, 1.6874, 1.8327, 1.1884, 1.3214, 1.3699, 1.6722, 1.8702, 2.0087, 1.1954, 1.4724]
	#error = [0.7164, 1.4536, 1.5265, 0.4906, 1.2961, 1.2349, 1.5868, 0.6354, 0.5401, 1.0304, 1.6324, 0.9295, 1.2839, 1.5418,1.4426]
	#confidence = [0.123, 0.0448, 0.214, 0.0772, 0.149, 0.145, 0.253, 0.0883, 0.0791, 0.0795, 0.175, 0.0979, 0.148, 0.15, 0.118]

	counts = {"bladder":130, "breast":983, "cervical":195, "colon adenocarcinoma":155, "glioblastoma multiforme":291,
	"head and neck squamous cell carcinoma":280, "pancreatic cancer":151, "liver hepatocellular carcinoma":199,
	"lung cancer":179, "pan-kidney":645, "prostate cancer":333, "skin cutaneous melanoma":346, "stomach adenocarcinoma":290,
	"thyroid carcinoma":406, "glioma":577}

	name = "Aging"
	num = "5"
	data = [6.2851, 10.3227, 8.2077, 0.913, 16.4029, 9.0951, 14.8916, 7.0081, 6.349, 6.8536, 6.3962, 4.8882, 14.8384, 9.5127, 17.5606]
	error = [2.4605, 6.126, 4.0094, 1.1901, 4.8837, 3.4547, 4.1749, 3.2267, 1.9739, 3.6531, 5.687, 3.3729, 4.3531, 8.4229, 8.1927]
	confidence = [0.423, 0.383, 0.563, 0.187, 0.561, 0.405, 0.666, 0.448, 0.289, 0.282, 0.61, 0.355, 0.501, 0.819, 0.668]

	cancer_types = ['Bladder', 'Breast', 'Cervical', 'Colon', 'Glioblastoma', 'Head & Neck', 'Pancreatic', 'Liver',
	'Lung', 'Kidney','Prostate', 'Skin', 'Stomach', 'Thyroid', 'Glioma']

	y_pos = np.arange(len(cancer_types))

	fig, ax = plt.subplots()

	plt.size = (1200, 1200)

	bar = ax.bar(cancer_types, data, align="center", yerr=confidence, alpha = 0.5)
	ax.set_xticks(y_pos, cancer_types)

	ax.set_title("Graph " + num + "." + " " + name + " Enrichment for Cancer Types", y=1.05)
	ax.set_ylabel(name + " Enrichment", labelpad=10)
	ax.set_xlabel("Cancer Type",labelpad=10)

	threshold = 1.0

	above_threshold = min(data)
	below_threshold = max(data)

	############### APOBEC GRAPH SETTINGS ##############

	#ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "7 out of 15 Cancers Had Significant " + name + " Enrichment")
	#ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0")
	#plt.yticks(np.arange(-1, 7, 1))

	############### APOBEC GRAPH SETTINGS ##############

	############### UV GRAPH SETTINGS ##############

	#ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "3 out of 15 Cancers Had Significant " + name + " Enrichment")
	#ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0")
	#plt.yticks(np.arange(0, 2.5, 0.5))

	############### UV GRAPH SETTINGS ##############

	############### Tobacco GRAPH SETTINGS ##############

	#ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "14 out of 15 Cancers Had Significant " + name + " Enrichment")
	#ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0")
	#plt.yticks(np.arange(0, 6, 0.5))

	############### Tobacco GRAPH SETTINGS ##############

	############### AID GRAPH SETTINGS ##############

	#ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "13 out of 15 Cancers Had Significant " + name + " Enrichment")
	#ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0")
	#plt.yticks(np.arange(0, 3.5, 0.5))

	############### AID GRAPH SETTINGS ##############


	############### Aging GRAPH SETTINGS ##############

	ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "14 out of 15 Cancers Had Significant " + name + " Enrichment")
	ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0")
	plt.yticks(np.arange(0, 24, 2))

	############### Aging GRAPH SETTINGS ##############

	ax.legend()


	for tick in ax.get_xticklabels():
	tick.set_rotation(90)

	plt.tight_layout()

	ax.yaxis.set_ticks_position('left')
	ax.xaxis.set_ticks_position('bottom')


	plt.savefig(name + '_enrich.png', dpi = 500)
	plt.show()