Last active
September 19, 2020 04:28
-
-
Save carolinecunningham/33128642dcb78aae0b67453ba5b2f6f8 to your computer and use it in GitHub Desktop.
create graphs for motifs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
####warning: only can be run locally & may depend on external code. this is just a place to store this | |
############### boxplot 1: APOBEC #################### | |
apobec_ints = {} | |
apobec = ["BLAC_apobec", "stomach_apobec", "breast_apobec", "cervix_apobec", "colonfin_apobec", "pancreas_apobec", | |
"glioblast_apobec", "HNCarcinoma_apobec", "pancreatic_apobec", "liver_apobec", "lung_apobec", "glioma_apobec", | |
"prostate_apobec", "skin_apobec", "thyroid_apobec"] | |
cancer_types = ['Bladder', 'Breast', 'Cervical', 'Colon', 'Glioblastoma', 'Head & Neck', 'Pancreatic', 'Liver', | |
'Lung', 'Kidney','Prostate', 'Skin', 'Stomach', 'Thyroid', 'Glioma'] | |
y_pos = np.arange(len(cancer_types)) | |
for ky in apobec: | |
list_dir = [] | |
for val in cancer_enrich_dir[ky]: | |
if len(val) > 0: | |
list_dir.append(float(val)) | |
apobec_ints[ky] = list_dir | |
print(apobec_ints) | |
apobec_data = [apobec_ints["BLAC_apobec"], apobec_ints["breast_apobec"], apobec_ints["cervix_apobec"], | |
apobec_ints["colonfin_apobec"], apobec_ints["glioblast_apobec"], apobec_ints["HNCarcinoma_apobec"], | |
apobec_ints["pancreas_apobec"], apobec_ints["liver_apobec"], apobec_ints["lung_apobec"], | |
apobec_ints["pancreatic_apobec"], apobec_ints["prostate_apobec"], apobec_ints["skin_apobec"], | |
apobec_ints["stomach_apobec"], apobec_ints["thyroid_apobec"], apobec_ints["glioma_apobec"]] | |
fig, ax = plt.subplots() | |
testPlot = ax.boxplot(apobec_data, showfliers=False) | |
plt.xticks(y_pos, cancer_types) | |
ax.set_title("Graph 1. APOBEC Enrichment for Cancer Types", y=1.05) | |
ax.set_ylabel("APOBEC Enrichment", labelpad=10) | |
ax.set_xlabel("Cancer Type", labelpad=10) | |
ax.legend() | |
for tick in ax.get_xticklabels(): | |
tick.set_rotation(90) | |
plt.xticks(np.arange(1, 16)) | |
plt.yticks(np.arange(0, 15, 1)) | |
plt.tight_layout() | |
plt.savefig('apobec_boxplot_enrich.png', dpi = 500) | |
############### boxplot 2: UV #################### | |
uv_ints = {} | |
uv = ["BLAC_uv", "stomach_uv", "breast_uv", "cervix_uv", "colon_uv", "pancreas_uv", | |
"glioblast_uv", "HNCarcinoma_uv", "pancreatic_uv", "liver_uv", "lung_uv", "glioma_uv", | |
"prostate_uv", "skin_uv", "thyroid_uv"] | |
cancer_types = ['Bladder', 'Breast', 'Cervical', 'Colon', 'Glioblastoma', 'Head & Neck', 'Pancreatic', 'Liver', | |
'Lung', 'Kidney','Prostate', 'Skin', 'Stomach', 'Thyroid', 'Glioma'] | |
y_pos = np.arange(len(cancer_types)) | |
for ky in uv: | |
list_dir = [] | |
for val in cancer_enrich_dir[ky]: | |
if len(val) > 0 and val != ",": | |
list_dir.append(float(val)) | |
uv_ints[ky] = list_dir | |
uv_data = [uv_ints["BLAC_uv"], uv_ints["breast_uv"], uv_ints["cervix_uv"], | |
uv_ints["colon_uv"], uv_ints["glioblast_uv"], uv_ints["HNCarcinoma_uv"], | |
uv_ints["pancreas_uv"], uv_ints["liver_uv"], uv_ints["lung_uv"], | |
uv_ints["pancreatic_uv"], uv_ints["prostate_uv"], uv_ints["skin_uv"], | |
uv_ints["stomach_uv"], uv_ints["thyroid_uv"], uv_ints["glioma_uv"]] | |
fig, ax = plt.subplots() | |
testPlot = ax.boxplot(uv_data, showfliers=False) | |
plt.xticks(y_pos, cancer_types) | |
ax.set_title("Graph 2. UV Enrichment for Cancer Types", y=1.05) | |
ax.set_ylabel("UV Enrichment", labelpad=10) | |
ax.set_xlabel("Cancer Type", labelpad=10) | |
ax.legend() | |
for tick in ax.get_xticklabels(): | |
tick.set_rotation(90) | |
plt.xticks(np.arange(1, 16)) | |
plt.yticks(np.arange(0, 3, 0.25)) | |
plt.tight_layout() | |
plt.savefig('uv_boxplot_enrich.png', dpi = 500) | |
############### boxplot 4: aid #################### | |
make_aid_boxplot() | |
############### boxplot 5: aging #################### | |
make_ag_boxplot() | |
make_ag_prop() | |
cancer_sig_files = [] | |
for cancer_type in cancer_genomes_dirs: | |
#cancer_sig_files.append((cancer_type + '_correct_apobec_sig.csv')) | |
#cancer_sig_files.append((cancer_type + '_correct_tobacco_sig.csv')) | |
#cancer_sig_files.append((cancer_type + '_correct_uv_sig.csv')) | |
cancer_sig_files.append((cancer_type + '_correct_aid_sig.csv')) | |
#cancer_sig_files.append((cancer_type + '_correct_ag_sig.csv')) | |
for cancer_file in cancer_sig_files: | |
with open(cancer_file, 'r') as csvfile: | |
p_value_list = [] | |
reader = csv.reader(csvfile, delimiter=','); | |
for row in reader: | |
#if "chi" in row[0]: | |
#p_value_list.append(1.) | |
#elif round(float(row[0]), 10) == 0: | |
#p_value_list.append(1E-10) | |
p_value_list.append(round(float(row[0]), 300)) | |
summary_p_val = stats.combine_pvalues(p_value_list) | |
print(summary_p_val[1], cancer_file) | |
for cancer_type in cancer_genomes_dirs: | |
with open (cancer_type + '_correct_apobec_sig.csv', 'w') as csvfile: | |
spamwriter = csv.writer(csvfile, delimiter= ' ', | |
quotechar='|', quoting=csv.QUOTE_MINIMAL) | |
p_val_list = [] | |
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type): | |
if file_i == ".DS_Store": | |
continue | |
with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file: | |
mut = file.read() | |
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19) | |
observed = get_enrichment(mutations_with_context, "TCW", 1, "C", "K", 50) | |
if 1.0 >= observed[3] >= 0.0: | |
appender = float(observed[3]) | |
spamwriter.writerow([appender]) | |
else: | |
if 1.0 >= observed[2] >= 0.0: | |
appender = float(observed[2]) | |
spamwriter.writerow([appender]) | |
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/new_breast"): | |
if file_i == ".DS_Store": | |
continue | |
with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file: | |
mut = file.read() | |
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19) | |
observed = get_enrichment(mutations_with_context, "TCW", 1, "C", "K", 50) | |
appender = [observed[1:2]] | |
spamwriter.writerows(appender) | |
with open (cancer_type + '_correct_ag_sig.csv', 'w') as csvfile: | |
spamwriter = csv.writer(csvfile, delimiter= ' ', | |
quotechar='|', quoting=csv.QUOTE_MINIMAL) | |
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type): | |
if file_i == ".DS_Store": | |
continue | |
with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file: | |
mut = file.read() | |
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19) | |
observed = get_enrichment(mutations_with_context, "CG", 0, "C", "T", 50) | |
if 1.0 >= observed[3] >= 0.0: | |
appender = float(observed[3]) | |
spamwriter.writerow([appender]) | |
else: | |
if 1.0 >= observed[2] >= 0.0: | |
appender = float(observed[2]) | |
spamwriter.writerow([appender]) | |
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type): | |
if file_i == ".DS_Store": | |
continue | |
with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file: | |
mut = file.read() | |
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19) | |
observed = get_enrichment(mutations_with_context, "CG", 0, "C", "T", 50) | |
appender = [observed[1:2]] | |
spamwriter.writerows(appender) | |
with open(cancer_type + '_correct_uv_sig.csv', 'w') as csvfile: | |
spamwriter = csv.writer(csvfile, delimiter=' ', | |
quotechar='|', quoting=csv.QUOTE_MINIMAL) | |
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type): | |
if file_i == ".DS_Store": | |
continue | |
with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file: | |
mut = file.read() | |
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19) | |
observed = get_enrichment(mutations_with_context, "YC", 1, "C", "T", 50) | |
if 1.0 >= observed[3] >= 0.0: | |
appender = float(observed[3]) | |
spamwriter.writerow([appender]) | |
else: | |
if 1.0 >= observed[2] >= 0.0: | |
appender = float(observed[2]) | |
spamwriter.writerow([appender]) | |
#logo = create_logo(mutations_with_context, "TCW", 1, "C", "K", 50) | |
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/new_breast"): | |
if file_i == ".DS_Store": | |
continue | |
with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file: | |
mut = file.read() | |
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19) | |
observed = get_enrichment(mutations_with_context, "YC", 1, "C", "T", 50) | |
appender = [observed[1:2]] | |
spamwriter.writerows(appender) | |
with open(cancer_type + '_correct_aid_sig.csv', 'w') as csvfile: | |
spamwriter = csv.writer(csvfile, delimiter=' ', | |
quotechar='|', quoting=csv.QUOTE_MINIMAL) | |
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type): | |
if file_i == ".DS_Store": | |
continue | |
with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file: | |
mut = file.read() | |
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19) | |
observed = get_enrichment(mutations_with_context, "NTW", 1, "T", "S", 50) | |
if 1.0 >= observed[3] >= 0.0: | |
appender = float(observed[3]) | |
spamwriter.writerow([appender]) | |
else: | |
if 1.0 >= observed[2] >= 0.0: | |
appender = float(observed[2]) | |
spamwriter.writerow([appender]) | |
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/new_breast"): | |
if file_i == ".DS_Store": | |
continue | |
with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file: | |
mut = file.read() | |
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19) | |
observed = get_enrichment(mutations_with_context, "NTW", 1, "T", "S", 50) | |
appender = [observed[1:2]] | |
spamwriter.writerows(appender) | |
with open(cancer_type + '_correct_tobacco_sig.csv', 'w') as csvfile: | |
spamwriter = csv.writer(csvfile, delimiter=' ', | |
quotechar='|', quoting=csv.QUOTE_MINIMAL) | |
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type): | |
if file_i == ".DS_Store" or file_i == "Manifest.txt": | |
continue | |
with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file: | |
mut = file.read() | |
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19) | |
observed = get_enrichment(mutations_with_context, "CG", 0, "C", "R", 50) | |
if 1.0 >= observed[3] >= 0.0: | |
appender = float(observed[3]) | |
else: | |
if 1.0 >= observed[2] >= 0.0: | |
appender = float(observed[2]) | |
spamwriter.writerow([appender]) | |
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/new_breast"): | |
if file_i == ".DS_Store" or file_i == "Manifest.txt": | |
continue | |
with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file: | |
mut = file.read() | |
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19) | |
observed = get_enrichment(mutations_with_context, "CG", 0, "C", "R", 50) | |
appender = [observed[1:2]] | |
spamwriter.writerows(appender) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##bargraphs code for GOOGLE SF | |
import matplotlib as mpl | |
mpl.use('TkAgg') | |
import matplotlib.pyplot as plt | |
plt.style.use('seaborn-ticks') | |
import numpy as np | |
import math | |
import pylab | |
#TO-DO: make graphs of enrichment for different cancer types | |
#remove leukemia from analysis | |
#make table with significance for different p-values, use python to do this | |
#order of data | |
#bladder, breast, cervical | |
#colon adenocarcinoma, glioblastoma multiforme, head and neck squamous cell carcinoma, pancreatic cancer | |
#liver hepatocellular carcinoma, lung cancer, pan-kidney, prostate cancer, skin cutaneous melanoma, stomach adenocarcinoma, thyroid carcinoma, & glioma | |
data_counts = {"bladder":130, "breast":983, "cervical":195, "colon adenocarcinoma":155, "glioblastoma multiforme":291, | |
"head and neck squamous cell carcinoma":280, "pancreatic cancer":151, "liver hepatocellular carcinoma":199, | |
"lung cancer":179, "pan-kidney":645, "prostate cancer":333, "skin cutaneous melanoma":346, "stomach adenocarcinoma":290, | |
"thyroid carcinoma":406, "glioma":577} | |
#name = "APOBEC" | |
#num = "1" | |
#data = [5.56, 2.248, 4.842, 0.4954, 0.4485, 2.535, 0.5659, 0.8392, 2.165, 0.9112, 1.1833, 1.248, 0.6218, 1.219, 0.4618] | |
#error = [2.9657, 2.7965, 3.523, 0.3219, 0.4156, 2.196, 0.6097, 0.416, 1.7013, 0.6839, 1.2873, 0.378, 0.5518, 2.0621, 0.4598] | |
#confidence = [0.51, 0.175, 0.494, 0.0507, 0.0478, 0.257, 0.0972, 0.0578, 0.249, 0.0529, 0.138, 0.0398, 0.0635, 0.201, 0.168] | |
#name = "UV" | |
#num = "2" | |
#data = [1.329, 0.9279, 1.19, 0.3407, 0.7913, 0.992, 0.5491, 0.7873, 0.9906, 0.7446, 0.7914, 1.556, 0.6514, 0.7334, 0.5506] | |
#error = [0.3155, 0.4582, 0.4478, 0.4423, .1269, 0.3195, 0.1996, 0.7873, 0.2789, 0.3155, 0.4416, 0.3491, 0.1855, 0.5577, 0.224] | |
#confidence = [0.0542, 0.0286, 0.0629, 0.0696, 0.0146, 0.0374, 0.0318, 0.109, 0.0409, 0.0243, 0.0474, 0.0368, 0.0213, 0.0542, 0.0183] | |
#name = "Tobacco" | |
#num = "3" | |
#data = [1.2023, 2.5748, 1.735, 0.4773, 2.0803, 2.1615, 2.8028, 3.43, 3.4835, 2.5836, 1.4285, 4.0497, 1.9877, 3.3275, 2.2154] | |
#error = [0.3724, 3.0696, 1.4121, 1.2318, 2.043, 2.1504, 3.8017, 2.1, 1.237, 2.1173, 2.488, 3.1189, 1.8416, 5.7224, 3.0512] | |
#confidence = [0.064, 0.192, 0.198, 0.194, 0.234, 0.252, 0.606, 0.292, 0.181, 0.163, 0.267, 0.329, 0.212, 0.557, 0.249] | |
#name = "AID" | |
#num = "4" | |
#data = [0.8035, 1.367, 1.4454, 0.4378, 1.4918, 1.6874, 1.8327, 1.1884, 1.3214, 1.3699, 1.6722, 1.8702, 2.0087, 1.1954, 1.4724] | |
#error = [0.7164, 1.4536, 1.5265, 0.4906, 1.2961, 1.2349, 1.5868, 0.6354, 0.5401, 1.0304, 1.6324, 0.9295, 1.2839, 1.5418,1.4426] | |
#confidence = [0.123, 0.0448, 0.214, 0.0772, 0.149, 0.145, 0.253, 0.0883, 0.0791, 0.0795, 0.175, 0.0979, 0.148, 0.15, 0.118] | |
counts = {"bladder":130, "breast":983, "cervical":195, "colon adenocarcinoma":155, "glioblastoma multiforme":291, | |
"head and neck squamous cell carcinoma":280, "pancreatic cancer":151, "liver hepatocellular carcinoma":199, | |
"lung cancer":179, "pan-kidney":645, "prostate cancer":333, "skin cutaneous melanoma":346, "stomach adenocarcinoma":290, | |
"thyroid carcinoma":406, "glioma":577} | |
name = "Aging" | |
num = "5" | |
data = [6.2851, 10.3227, 8.2077, 0.913, 16.4029, 9.0951, 14.8916, 7.0081, 6.349, 6.8536, 6.3962, 4.8882, 14.8384, 9.5127, 17.5606] | |
error = [2.4605, 6.126, 4.0094, 1.1901, 4.8837, 3.4547, 4.1749, 3.2267, 1.9739, 3.6531, 5.687, 3.3729, 4.3531, 8.4229, 8.1927] | |
confidence = [0.423, 0.383, 0.563, 0.187, 0.561, 0.405, 0.666, 0.448, 0.289, 0.282, 0.61, 0.355, 0.501, 0.819, 0.668] | |
cancer_types = ['Bladder', 'Breast', 'Cervical', 'Colon', 'Glioblastoma', 'Head & Neck', 'Pancreatic', 'Liver', | |
'Lung', 'Kidney','Prostate', 'Skin', 'Stomach', 'Thyroid', 'Glioma'] | |
y_pos = np.arange(len(cancer_types)) | |
fig, ax = plt.subplots() | |
plt.size = (1200, 1200) | |
bar = ax.bar(cancer_types, data, align="center", yerr=confidence, alpha = 0.5) | |
ax.set_xticks(y_pos, cancer_types) | |
ax.set_title("Graph " + num + "." + " " + name + " Enrichment for Cancer Types", y=1.05) | |
ax.set_ylabel(name + " Enrichment", labelpad=10) | |
ax.set_xlabel("Cancer Type",labelpad=10) | |
threshold = 1.0 | |
above_threshold = min(data) | |
below_threshold = max(data) | |
############### APOBEC GRAPH SETTINGS ############## | |
#ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "7 out of 15 Cancers Had Significant " + name + " Enrichment") | |
#ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0") | |
#plt.yticks(np.arange(-1, 7, 1)) | |
############### APOBEC GRAPH SETTINGS ############## | |
############### UV GRAPH SETTINGS ############## | |
#ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "3 out of 15 Cancers Had Significant " + name + " Enrichment") | |
#ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0") | |
#plt.yticks(np.arange(0, 2.5, 0.5)) | |
############### UV GRAPH SETTINGS ############## | |
############### Tobacco GRAPH SETTINGS ############## | |
#ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "14 out of 15 Cancers Had Significant " + name + " Enrichment") | |
#ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0") | |
#plt.yticks(np.arange(0, 6, 0.5)) | |
############### Tobacco GRAPH SETTINGS ############## | |
############### AID GRAPH SETTINGS ############## | |
#ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "13 out of 15 Cancers Had Significant " + name + " Enrichment") | |
#ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0") | |
#plt.yticks(np.arange(0, 3.5, 0.5)) | |
############### AID GRAPH SETTINGS ############## | |
############### Aging GRAPH SETTINGS ############## | |
ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "14 out of 15 Cancers Had Significant " + name + " Enrichment") | |
ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0") | |
plt.yticks(np.arange(0, 24, 2)) | |
############### Aging GRAPH SETTINGS ############## | |
ax.legend() | |
for tick in ax.get_xticklabels(): | |
tick.set_rotation(90) | |
plt.tight_layout() | |
ax.yaxis.set_ticks_position('left') | |
ax.xaxis.set_ticks_position('bottom') | |
plt.savefig(name + '_enrich.png', dpi = 500) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment