Skip to content

Instantly share code, notes, and snippets.

@carolinecunningham
Last active September 19, 2020 04:28
Show Gist options
  • Save carolinecunningham/33128642dcb78aae0b67453ba5b2f6f8 to your computer and use it in GitHub Desktop.
Save carolinecunningham/33128642dcb78aae0b67453ba5b2f6f8 to your computer and use it in GitHub Desktop.
create graphs for motifs
####warning: only can be run locally & may depend on external code. this is just a place to store this
############### boxplot 1: APOBEC ####################
apobec_ints = {}
apobec = ["BLAC_apobec", "stomach_apobec", "breast_apobec", "cervix_apobec", "colonfin_apobec", "pancreas_apobec",
"glioblast_apobec", "HNCarcinoma_apobec", "pancreatic_apobec", "liver_apobec", "lung_apobec", "glioma_apobec",
"prostate_apobec", "skin_apobec", "thyroid_apobec"]
cancer_types = ['Bladder', 'Breast', 'Cervical', 'Colon', 'Glioblastoma', 'Head & Neck', 'Pancreatic', 'Liver',
'Lung', 'Kidney','Prostate', 'Skin', 'Stomach', 'Thyroid', 'Glioma']
y_pos = np.arange(len(cancer_types))
for ky in apobec:
list_dir = []
for val in cancer_enrich_dir[ky]:
if len(val) > 0:
list_dir.append(float(val))
apobec_ints[ky] = list_dir
print(apobec_ints)
apobec_data = [apobec_ints["BLAC_apobec"], apobec_ints["breast_apobec"], apobec_ints["cervix_apobec"],
apobec_ints["colonfin_apobec"], apobec_ints["glioblast_apobec"], apobec_ints["HNCarcinoma_apobec"],
apobec_ints["pancreas_apobec"], apobec_ints["liver_apobec"], apobec_ints["lung_apobec"],
apobec_ints["pancreatic_apobec"], apobec_ints["prostate_apobec"], apobec_ints["skin_apobec"],
apobec_ints["stomach_apobec"], apobec_ints["thyroid_apobec"], apobec_ints["glioma_apobec"]]
fig, ax = plt.subplots()
testPlot = ax.boxplot(apobec_data, showfliers=False)
plt.xticks(y_pos, cancer_types)
ax.set_title("Graph 1. APOBEC Enrichment for Cancer Types", y=1.05)
ax.set_ylabel("APOBEC Enrichment", labelpad=10)
ax.set_xlabel("Cancer Type", labelpad=10)
ax.legend()
for tick in ax.get_xticklabels():
tick.set_rotation(90)
plt.xticks(np.arange(1, 16))
plt.yticks(np.arange(0, 15, 1))
plt.tight_layout()
plt.savefig('apobec_boxplot_enrich.png', dpi = 500)
############### boxplot 2: UV ####################
uv_ints = {}
uv = ["BLAC_uv", "stomach_uv", "breast_uv", "cervix_uv", "colon_uv", "pancreas_uv",
"glioblast_uv", "HNCarcinoma_uv", "pancreatic_uv", "liver_uv", "lung_uv", "glioma_uv",
"prostate_uv", "skin_uv", "thyroid_uv"]
cancer_types = ['Bladder', 'Breast', 'Cervical', 'Colon', 'Glioblastoma', 'Head & Neck', 'Pancreatic', 'Liver',
'Lung', 'Kidney','Prostate', 'Skin', 'Stomach', 'Thyroid', 'Glioma']
y_pos = np.arange(len(cancer_types))
for ky in uv:
list_dir = []
for val in cancer_enrich_dir[ky]:
if len(val) > 0 and val != ",":
list_dir.append(float(val))
uv_ints[ky] = list_dir
uv_data = [uv_ints["BLAC_uv"], uv_ints["breast_uv"], uv_ints["cervix_uv"],
uv_ints["colon_uv"], uv_ints["glioblast_uv"], uv_ints["HNCarcinoma_uv"],
uv_ints["pancreas_uv"], uv_ints["liver_uv"], uv_ints["lung_uv"],
uv_ints["pancreatic_uv"], uv_ints["prostate_uv"], uv_ints["skin_uv"],
uv_ints["stomach_uv"], uv_ints["thyroid_uv"], uv_ints["glioma_uv"]]
fig, ax = plt.subplots()
testPlot = ax.boxplot(uv_data, showfliers=False)
plt.xticks(y_pos, cancer_types)
ax.set_title("Graph 2. UV Enrichment for Cancer Types", y=1.05)
ax.set_ylabel("UV Enrichment", labelpad=10)
ax.set_xlabel("Cancer Type", labelpad=10)
ax.legend()
for tick in ax.get_xticklabels():
tick.set_rotation(90)
plt.xticks(np.arange(1, 16))
plt.yticks(np.arange(0, 3, 0.25))
plt.tight_layout()
plt.savefig('uv_boxplot_enrich.png', dpi = 500)
############### boxplot 4: aid ####################
make_aid_boxplot()
############### boxplot 5: aging ####################
make_ag_boxplot()
make_ag_prop()
cancer_sig_files = []
for cancer_type in cancer_genomes_dirs:
#cancer_sig_files.append((cancer_type + '_correct_apobec_sig.csv'))
#cancer_sig_files.append((cancer_type + '_correct_tobacco_sig.csv'))
#cancer_sig_files.append((cancer_type + '_correct_uv_sig.csv'))
cancer_sig_files.append((cancer_type + '_correct_aid_sig.csv'))
#cancer_sig_files.append((cancer_type + '_correct_ag_sig.csv'))
for cancer_file in cancer_sig_files:
with open(cancer_file, 'r') as csvfile:
p_value_list = []
reader = csv.reader(csvfile, delimiter=',');
for row in reader:
#if "chi" in row[0]:
#p_value_list.append(1.)
#elif round(float(row[0]), 10) == 0:
#p_value_list.append(1E-10)
p_value_list.append(round(float(row[0]), 300))
summary_p_val = stats.combine_pvalues(p_value_list)
print(summary_p_val[1], cancer_file)
for cancer_type in cancer_genomes_dirs:
with open (cancer_type + '_correct_apobec_sig.csv', 'w') as csvfile:
spamwriter = csv.writer(csvfile, delimiter= ' ',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
p_val_list = []
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
if file_i == ".DS_Store":
continue
with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file:
mut = file.read()
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
observed = get_enrichment(mutations_with_context, "TCW", 1, "C", "K", 50)
if 1.0 >= observed[3] >= 0.0:
appender = float(observed[3])
spamwriter.writerow([appender])
else:
if 1.0 >= observed[2] >= 0.0:
appender = float(observed[2])
spamwriter.writerow([appender])
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/new_breast"):
if file_i == ".DS_Store":
continue
with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file:
mut = file.read()
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
observed = get_enrichment(mutations_with_context, "TCW", 1, "C", "K", 50)
appender = [observed[1:2]]
spamwriter.writerows(appender)
with open (cancer_type + '_correct_ag_sig.csv', 'w') as csvfile:
spamwriter = csv.writer(csvfile, delimiter= ' ',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
if file_i == ".DS_Store":
continue
with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file:
mut = file.read()
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
observed = get_enrichment(mutations_with_context, "CG", 0, "C", "T", 50)
if 1.0 >= observed[3] >= 0.0:
appender = float(observed[3])
spamwriter.writerow([appender])
else:
if 1.0 >= observed[2] >= 0.0:
appender = float(observed[2])
spamwriter.writerow([appender])
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
if file_i == ".DS_Store":
continue
with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file:
mut = file.read()
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
observed = get_enrichment(mutations_with_context, "CG", 0, "C", "T", 50)
appender = [observed[1:2]]
spamwriter.writerows(appender)
with open(cancer_type + '_correct_uv_sig.csv', 'w') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=' ',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
if file_i == ".DS_Store":
continue
with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file:
mut = file.read()
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
observed = get_enrichment(mutations_with_context, "YC", 1, "C", "T", 50)
if 1.0 >= observed[3] >= 0.0:
appender = float(observed[3])
spamwriter.writerow([appender])
else:
if 1.0 >= observed[2] >= 0.0:
appender = float(observed[2])
spamwriter.writerow([appender])
#logo = create_logo(mutations_with_context, "TCW", 1, "C", "K", 50)
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/new_breast"):
if file_i == ".DS_Store":
continue
with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file:
mut = file.read()
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
observed = get_enrichment(mutations_with_context, "YC", 1, "C", "T", 50)
appender = [observed[1:2]]
spamwriter.writerows(appender)
with open(cancer_type + '_correct_aid_sig.csv', 'w') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=' ',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
if file_i == ".DS_Store":
continue
with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file:
mut = file.read()
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
observed = get_enrichment(mutations_with_context, "NTW", 1, "T", "S", 50)
if 1.0 >= observed[3] >= 0.0:
appender = float(observed[3])
spamwriter.writerow([appender])
else:
if 1.0 >= observed[2] >= 0.0:
appender = float(observed[2])
spamwriter.writerow([appender])
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/new_breast"):
if file_i == ".DS_Store":
continue
with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file:
mut = file.read()
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
observed = get_enrichment(mutations_with_context, "NTW", 1, "T", "S", 50)
appender = [observed[1:2]]
spamwriter.writerows(appender)
with open(cancer_type + '_correct_tobacco_sig.csv', 'w') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=' ',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type):
if file_i == ".DS_Store" or file_i == "Manifest.txt":
continue
with open("/Users/960148/PycharmProjects/Motifs/genomes/" + cancer_type + "/" + file_i) as file:
mut = file.read()
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
observed = get_enrichment(mutations_with_context, "CG", 0, "C", "R", 50)
if 1.0 >= observed[3] >= 0.0:
appender = float(observed[3])
else:
if 1.0 >= observed[2] >= 0.0:
appender = float(observed[2])
spamwriter.writerow([appender])
for file_i in os.listdir("/Users/960148/PycharmProjects/Motifs/genomes/new_breast"):
if file_i == ".DS_Store" or file_i == "Manifest.txt":
continue
with open("/Users/960148/PycharmProjects/Motifs/genomes/new_breast/" + file_i) as file:
mut = file.read()
_, mutations_with_context, _ = read_mutations(mut, 'auto', 19)
observed = get_enrichment(mutations_with_context, "CG", 0, "C", "R", 50)
appender = [observed[1:2]]
spamwriter.writerows(appender)
##bargraphs code for GOOGLE SF
import matplotlib as mpl
mpl.use('TkAgg')
import matplotlib.pyplot as plt
plt.style.use('seaborn-ticks')
import numpy as np
import math
import pylab
#TO-DO: make graphs of enrichment for different cancer types
#remove leukemia from analysis
#make table with significance for different p-values, use python to do this
#order of data
#bladder, breast, cervical
#colon adenocarcinoma, glioblastoma multiforme, head and neck squamous cell carcinoma, pancreatic cancer
#liver hepatocellular carcinoma, lung cancer, pan-kidney, prostate cancer, skin cutaneous melanoma, stomach adenocarcinoma, thyroid carcinoma, & glioma
data_counts = {"bladder":130, "breast":983, "cervical":195, "colon adenocarcinoma":155, "glioblastoma multiforme":291,
"head and neck squamous cell carcinoma":280, "pancreatic cancer":151, "liver hepatocellular carcinoma":199,
"lung cancer":179, "pan-kidney":645, "prostate cancer":333, "skin cutaneous melanoma":346, "stomach adenocarcinoma":290,
"thyroid carcinoma":406, "glioma":577}
#name = "APOBEC"
#num = "1"
#data = [5.56, 2.248, 4.842, 0.4954, 0.4485, 2.535, 0.5659, 0.8392, 2.165, 0.9112, 1.1833, 1.248, 0.6218, 1.219, 0.4618]
#error = [2.9657, 2.7965, 3.523, 0.3219, 0.4156, 2.196, 0.6097, 0.416, 1.7013, 0.6839, 1.2873, 0.378, 0.5518, 2.0621, 0.4598]
#confidence = [0.51, 0.175, 0.494, 0.0507, 0.0478, 0.257, 0.0972, 0.0578, 0.249, 0.0529, 0.138, 0.0398, 0.0635, 0.201, 0.168]
#name = "UV"
#num = "2"
#data = [1.329, 0.9279, 1.19, 0.3407, 0.7913, 0.992, 0.5491, 0.7873, 0.9906, 0.7446, 0.7914, 1.556, 0.6514, 0.7334, 0.5506]
#error = [0.3155, 0.4582, 0.4478, 0.4423, .1269, 0.3195, 0.1996, 0.7873, 0.2789, 0.3155, 0.4416, 0.3491, 0.1855, 0.5577, 0.224]
#confidence = [0.0542, 0.0286, 0.0629, 0.0696, 0.0146, 0.0374, 0.0318, 0.109, 0.0409, 0.0243, 0.0474, 0.0368, 0.0213, 0.0542, 0.0183]
#name = "Tobacco"
#num = "3"
#data = [1.2023, 2.5748, 1.735, 0.4773, 2.0803, 2.1615, 2.8028, 3.43, 3.4835, 2.5836, 1.4285, 4.0497, 1.9877, 3.3275, 2.2154]
#error = [0.3724, 3.0696, 1.4121, 1.2318, 2.043, 2.1504, 3.8017, 2.1, 1.237, 2.1173, 2.488, 3.1189, 1.8416, 5.7224, 3.0512]
#confidence = [0.064, 0.192, 0.198, 0.194, 0.234, 0.252, 0.606, 0.292, 0.181, 0.163, 0.267, 0.329, 0.212, 0.557, 0.249]
#name = "AID"
#num = "4"
#data = [0.8035, 1.367, 1.4454, 0.4378, 1.4918, 1.6874, 1.8327, 1.1884, 1.3214, 1.3699, 1.6722, 1.8702, 2.0087, 1.1954, 1.4724]
#error = [0.7164, 1.4536, 1.5265, 0.4906, 1.2961, 1.2349, 1.5868, 0.6354, 0.5401, 1.0304, 1.6324, 0.9295, 1.2839, 1.5418,1.4426]
#confidence = [0.123, 0.0448, 0.214, 0.0772, 0.149, 0.145, 0.253, 0.0883, 0.0791, 0.0795, 0.175, 0.0979, 0.148, 0.15, 0.118]
counts = {"bladder":130, "breast":983, "cervical":195, "colon adenocarcinoma":155, "glioblastoma multiforme":291,
"head and neck squamous cell carcinoma":280, "pancreatic cancer":151, "liver hepatocellular carcinoma":199,
"lung cancer":179, "pan-kidney":645, "prostate cancer":333, "skin cutaneous melanoma":346, "stomach adenocarcinoma":290,
"thyroid carcinoma":406, "glioma":577}
name = "Aging"
num = "5"
data = [6.2851, 10.3227, 8.2077, 0.913, 16.4029, 9.0951, 14.8916, 7.0081, 6.349, 6.8536, 6.3962, 4.8882, 14.8384, 9.5127, 17.5606]
error = [2.4605, 6.126, 4.0094, 1.1901, 4.8837, 3.4547, 4.1749, 3.2267, 1.9739, 3.6531, 5.687, 3.3729, 4.3531, 8.4229, 8.1927]
confidence = [0.423, 0.383, 0.563, 0.187, 0.561, 0.405, 0.666, 0.448, 0.289, 0.282, 0.61, 0.355, 0.501, 0.819, 0.668]
cancer_types = ['Bladder', 'Breast', 'Cervical', 'Colon', 'Glioblastoma', 'Head & Neck', 'Pancreatic', 'Liver',
'Lung', 'Kidney','Prostate', 'Skin', 'Stomach', 'Thyroid', 'Glioma']
y_pos = np.arange(len(cancer_types))
fig, ax = plt.subplots()
plt.size = (1200, 1200)
bar = ax.bar(cancer_types, data, align="center", yerr=confidence, alpha = 0.5)
ax.set_xticks(y_pos, cancer_types)
ax.set_title("Graph " + num + "." + " " + name + " Enrichment for Cancer Types", y=1.05)
ax.set_ylabel(name + " Enrichment", labelpad=10)
ax.set_xlabel("Cancer Type",labelpad=10)
threshold = 1.0
above_threshold = min(data)
below_threshold = max(data)
############### APOBEC GRAPH SETTINGS ##############
#ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "7 out of 15 Cancers Had Significant " + name + " Enrichment")
#ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0")
#plt.yticks(np.arange(-1, 7, 1))
############### APOBEC GRAPH SETTINGS ##############
############### UV GRAPH SETTINGS ##############
#ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "3 out of 15 Cancers Had Significant " + name + " Enrichment")
#ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0")
#plt.yticks(np.arange(0, 2.5, 0.5))
############### UV GRAPH SETTINGS ##############
############### Tobacco GRAPH SETTINGS ##############
#ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "14 out of 15 Cancers Had Significant " + name + " Enrichment")
#ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0")
#plt.yticks(np.arange(0, 6, 0.5))
############### Tobacco GRAPH SETTINGS ##############
############### AID GRAPH SETTINGS ##############
#ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "13 out of 15 Cancers Had Significant " + name + " Enrichment")
#ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0")
#plt.yticks(np.arange(0, 3.5, 0.5))
############### AID GRAPH SETTINGS ##############
############### Aging GRAPH SETTINGS ##############
ax.axhline(1.0, color = "orange", linewidth = 2, linestyle = 'dotted', label = "14 out of 15 Cancers Had Significant " + name + " Enrichment")
ax.axhline(1.0, linestyle = "None", label = "with 95% Confidence Intervals Above an Enrichment of 1.0")
plt.yticks(np.arange(0, 24, 2))
############### Aging GRAPH SETTINGS ##############
ax.legend()
for tick in ax.get_xticklabels():
tick.set_rotation(90)
plt.tight_layout()
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
plt.savefig(name + '_enrich.png', dpi = 500)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment