Skip to content

Instantly share code, notes, and snippets.

@cplaisier
Created December 2, 2021 20:31
Show Gist options
  • Save cplaisier/9333172a840885bb79e3f9bc80d2da70 to your computer and use it in GitHub Desktop.
Save cplaisier/9333172a840885bb79e3f9bc80d2da70 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 19 17:22:40 2021
@author: Pamela Appiah
"""
#Load Up Data
import pandas as pd
from scipy.stats import hypergeom
from statsmodels.stats import multitest
from matplotlib import pyplot as plt
from matplotlib_venn import venn2, venn3
from matplotlib.backends.backend_pdf import PdfPages
# TFs from BRCA
TFs = [str(i) for i in pd.read_csv('ProjectFinal/humanTFsFINAL_ENTREZ_GO_0003700.csv',header=None)[0]]
BRCA = pd.read_csv('ProjectFinal/postProcessed_BRCA_pita.csv', header=0, index_col=0)
All = set([j for i in BRCA['Genes.1'] for j in i.split(' ')]).intersection(TFs)
#All = TFs
BRCA = BRCA.loc[BRCA['BRCA Var. Exp. First PC Perm. P-Value']<=0.05] # Bicluster quality
BRCA = BRCA.loc[BRCA['OS.covAgeSex_BRCA.p']<=0.05] # Associated with patient survival
BRCA_TFs = set([i.split(':')[0] for i in BRCA['TFBS_DB.Minimum Correlated_BRCA'] if not pd.isnull(i)])
# Disgenet
DisGenett = pd.read_csv('ProjectFinal/all_gene_disease_pmid_associations.tsv', delimiter='\t')
Disgenet_TFs = set([str(i) for i in set(DisGenett.loc[DisGenett['diseaseName']=='Breast Carcinoma','geneId'])]).intersection(All)
#DepMap
Depmap = pd.read_csv('ProjectFinal/sample_info.csv')
DepMap_Id = set(Depmap.loc[Depmap['primary_disease']=='Breast Cancer','DepMap_ID'])
Crispr = pd.read_csv('ProjectFinal/CRISPR_gene_effect.csv', header=0, index_col=0)
Crispr_ID = set(Crispr.index).intersection(DepMap_Id)
BRCA_DepMap = Crispr.loc[Crispr_ID]
BRCA_DepMap_TFs = set([i.split(' ')[1].strip('(').strip(')') for i in BRCA_DepMap.columns[BRCA_DepMap.min()<=-1]]).intersection(All)
#Disgenet TFs Vs BRCA TFs
k = len(Disgenet_TFs.intersection(BRCA_TFs))
M = len(TFs)
n= len(Disgenet_TFs)
N = len(BRCA_TFs)
p_value = hypergeom.sf(k, M, n, N)
print([k,M,n,N,p_value])
#DepMap TFs Vs BRCA TFs
k = len(BRCA_DepMap_TFs.intersection(BRCA_TFs))
M = len(TFs)
n = len(BRCA_DepMap_TFs)
N = len(BRCA_TFs)
p_value = hypergeom.sf(k, M, n, N)
print([k,M,n,N,p_value])
# Two sets showing DepMap TFs Vs BRCA TFs
with PdfPages('DepMap TFs vs BRCA_TFs') as pdf:
venn2([BRCA_DepMap_TFs , BRCA_TFs],('DepMap TFs', 'BRCA_TFs'))
pdf.savefig()
plt.close()
# Two sets showing DepMap TFs Vs BRCA TFs
with PdfPages('DisGenet TFs vs BRCA_TFs') as pdf:
venn2([Disgenet_TFs, BRCA_TFs], ('DisGenet TFs', 'BRCA_TFs'))
pdf.savefig()
plt.close()
# Comparing three sets
with PdfPages('DepMap TFs vs Disgenet TFs vs BRCA_TFs') as pdf:
venn3([BRCA_DepMap_TFs, Disgenet_TFs, set(BRCA_TFs)], ('DepMap ID', 'DisGenet', 'BRCA_TFs'))
pdf.savefig()
plt.close()
#9 TFs / intersection of the three sets
I = Disgenet_TFs.intersection(BRCA_TFs).intersection(BRCA_DepMap_TFs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment