Created
December 2, 2021 20:31
-
-
Save cplaisier/9333172a840885bb79e3f9bc80d2da70 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Fri Nov 19 17:22:40 2021 | |
@author: Pamela Appiah | |
""" | |
#Load Up Data | |
import pandas as pd | |
from scipy.stats import hypergeom | |
from statsmodels.stats import multitest | |
from matplotlib import pyplot as plt | |
from matplotlib_venn import venn2, venn3 | |
from matplotlib.backends.backend_pdf import PdfPages | |
# TFs from BRCA | |
TFs = [str(i) for i in pd.read_csv('ProjectFinal/humanTFsFINAL_ENTREZ_GO_0003700.csv',header=None)[0]] | |
BRCA = pd.read_csv('ProjectFinal/postProcessed_BRCA_pita.csv', header=0, index_col=0) | |
All = set([j for i in BRCA['Genes.1'] for j in i.split(' ')]).intersection(TFs) | |
#All = TFs | |
BRCA = BRCA.loc[BRCA['BRCA Var. Exp. First PC Perm. P-Value']<=0.05] # Bicluster quality | |
BRCA = BRCA.loc[BRCA['OS.covAgeSex_BRCA.p']<=0.05] # Associated with patient survival | |
BRCA_TFs = set([i.split(':')[0] for i in BRCA['TFBS_DB.Minimum Correlated_BRCA'] if not pd.isnull(i)]) | |
# Disgenet | |
DisGenett = pd.read_csv('ProjectFinal/all_gene_disease_pmid_associations.tsv', delimiter='\t') | |
Disgenet_TFs = set([str(i) for i in set(DisGenett.loc[DisGenett['diseaseName']=='Breast Carcinoma','geneId'])]).intersection(All) | |
#DepMap | |
Depmap = pd.read_csv('ProjectFinal/sample_info.csv') | |
DepMap_Id = set(Depmap.loc[Depmap['primary_disease']=='Breast Cancer','DepMap_ID']) | |
Crispr = pd.read_csv('ProjectFinal/CRISPR_gene_effect.csv', header=0, index_col=0) | |
Crispr_ID = set(Crispr.index).intersection(DepMap_Id) | |
BRCA_DepMap = Crispr.loc[Crispr_ID] | |
BRCA_DepMap_TFs = set([i.split(' ')[1].strip('(').strip(')') for i in BRCA_DepMap.columns[BRCA_DepMap.min()<=-1]]).intersection(All) | |
#Disgenet TFs Vs BRCA TFs | |
k = len(Disgenet_TFs.intersection(BRCA_TFs)) | |
M = len(TFs) | |
n= len(Disgenet_TFs) | |
N = len(BRCA_TFs) | |
p_value = hypergeom.sf(k, M, n, N) | |
print([k,M,n,N,p_value]) | |
#DepMap TFs Vs BRCA TFs | |
k = len(BRCA_DepMap_TFs.intersection(BRCA_TFs)) | |
M = len(TFs) | |
n = len(BRCA_DepMap_TFs) | |
N = len(BRCA_TFs) | |
p_value = hypergeom.sf(k, M, n, N) | |
print([k,M,n,N,p_value]) | |
# Two sets showing DepMap TFs Vs BRCA TFs | |
with PdfPages('DepMap TFs vs BRCA_TFs') as pdf: | |
venn2([BRCA_DepMap_TFs , BRCA_TFs],('DepMap TFs', 'BRCA_TFs')) | |
pdf.savefig() | |
plt.close() | |
# Two sets showing DepMap TFs Vs BRCA TFs | |
with PdfPages('DisGenet TFs vs BRCA_TFs') as pdf: | |
venn2([Disgenet_TFs, BRCA_TFs], ('DisGenet TFs', 'BRCA_TFs')) | |
pdf.savefig() | |
plt.close() | |
# Comparing three sets | |
with PdfPages('DepMap TFs vs Disgenet TFs vs BRCA_TFs') as pdf: | |
venn3([BRCA_DepMap_TFs, Disgenet_TFs, set(BRCA_TFs)], ('DepMap ID', 'DisGenet', 'BRCA_TFs')) | |
pdf.savefig() | |
plt.close() | |
#9 TFs / intersection of the three sets | |
I = Disgenet_TFs.intersection(BRCA_TFs).intersection(BRCA_DepMap_TFs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment