Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Download gene set libraries from Enrichr in GMT format for ssGSEA - done in parallel with async
from pathlib import Path
import parmap
import requests
def update(gsll):
return [g for g in gsll if not (output_dir / (g + ".gmt")).exists()]
def get(gsl):
f = output_dir / (gsl + ".gmt")
if f.exists():
return
with requests.get(url + gsl) as req:
if req.ok:
with open(f, "w") as h:
h.write(req.content.decode())
output_dir = Path("gene_set_libraries")
output_dir.mkdir(exist_ok=True)
url = "https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName="
gsllf = "gsls.enrichr.txt"
gsll = update(open(gsllf, "r").read().strip().split("\n"))
while gsll:
req = parmap.map_async(get, gsll)
try:
res = req.get()
except requests.exceptions.ChunkedEncodingError:
pass
gsll = update(gsll)
# # To run in serial:
# for gsl in tqdm(gsll):
# get(gsl)
Genes_Associated_with_NIH_Grants
Cancer_Cell_Line_Encyclopedia
Achilles_fitness_decrease
Achilles_fitness_increase
Aging_Perturbations_from_GEO_down
Aging_Perturbations_from_GEO_up
Allen_Brain_Atlas_down
Allen_Brain_Atlas_up
ARCHS4_Cell-lines
ARCHS4_IDG_Coexp
ARCHS4_Kinases_Coexp
ARCHS4_TFs_Coexp
ARCHS4_Tissues
BioCarta_2013
BioCarta_2015
BioCarta_2016
BioPlanet_2019
BioPlex_2017
CCLE_Proteomics_2020
ChEA_2013
ChEA_2015
ChEA_2016
Chromosome_Location
Chromosome_Location_hg19
ClinVar_2019
CORUM
COVID-19_Related_Gene_Sets
Data_Acquisition_Method_Most_Popular_Genes
dbGaP
DepMap_WG_CRISPR_Screens_Broad_CellLines_2019
DepMap_WG_CRISPR_Screens_Sanger_CellLines_2019
Disease_Perturbations_from_GEO_down
Disease_Perturbations_from_GEO_up
Disease_Signatures_from_GEO_down_2014
Disease_Signatures_from_GEO_up_2014
DisGeNET
Drug_Perturbations_from_GEO_2014
Drug_Perturbations_from_GEO_down
Drug_Perturbations_from_GEO_up
DrugMatrix
DSigDB
Elsevier_Pathway_Collection
ENCODE_and_ChEA_Consensus_TFs_from_ChIP-X
ENCODE_Histone_Modifications_2013
ENCODE_Histone_Modifications_2015
ENCODE_TF_ChIP-seq_2014
ENCODE_TF_ChIP-seq_2015
Enrichr_Libraries_Most_Popular_Genes
Enrichr_Submissions_TF-Gene_Coocurrence
Enrichr_Users_Contributed_Lists_2020
Epigenomics_Roadmap_HM_ChIP-seq
ESCAPE
Gene_Perturbations_from_GEO_down
Gene_Perturbations_from_GEO_up
GeneSigDB
Genome_Browser_PWMs
GO_Biological_Process_2013
GO_Biological_Process_2015
GO_Biological_Process_2017
GO_Biological_Process_2017b
GO_Biological_Process_2018
GO_Cellular_Component_2013
GO_Cellular_Component_2015
GO_Cellular_Component_2017
GO_Cellular_Component_2017b
GO_Cellular_Component_2018
GO_Molecular_Function_2013
GO_Molecular_Function_2015
GO_Molecular_Function_2017
GO_Molecular_Function_2017b
GO_Molecular_Function_2018
GTEx_Tissue_Sample_Gene_Expression_Profiles_down
GTEx_Tissue_Sample_Gene_Expression_Profiles_up
GWAS_Catalog_2019
HMDB_Metabolites
HMS_LINCS_KinomeScan
HomoloGene
Human_Gene_Atlas
Human_Phenotype_Ontology
HumanCyc_2015
HumanCyc_2016
huMAP
InterPro_Domains_2019
Jensen_COMPARTMENTS
Jensen_DISEASES
Jensen_TISSUES
KEA_2013
KEA_2015
KEGG_2013
KEGG_2015
KEGG_2016
KEGG_2019_Human
KEGG_2019_Mouse
Kinase_Perturbations_from_GEO_down
Kinase_Perturbations_from_GEO_up
L1000_Kinase_and_GPCR_Perturbations_down
L1000_Kinase_and_GPCR_Perturbations_up
Ligand_Perturbations_from_GEO_down
Ligand_Perturbations_from_GEO_up
LINCS_L1000_Chem_Pert_down
LINCS_L1000_Chem_Pert_up
LINCS_L1000_Ligand_Perturbations_down
LINCS_L1000_Ligand_Perturbations_up
lncHUB_lncRNA_Co-Expression
MCF7_Perturbations_from_GEO_down
MCF7_Perturbations_from_GEO_up
MGI_Mammalian_Phenotype_2013
MGI_Mammalian_Phenotype_2017
MGI_Mammalian_Phenotype_Level_3
MGI_Mammalian_Phenotype_Level_4
MGI_Mammalian_Phenotype_Level_4_2019
Microbe_Perturbations_from_GEO_down
Microbe_Perturbations_from_GEO_up
miRTarBase_2017
Mouse_Gene_Atlas
MSigDB_Computational
MSigDB_Hallmark_2020
MSigDB_Oncogenic_Signatures
NCI-60_Cancer_Cell_Lines
NCI-Nature_2015
NCI-Nature_2016
NIH_Funded_PIs_2017_AutoRIF_ARCHS4_Predictions
NIH_Funded_PIs_2017_GeneRIF_ARCHS4_Predictions
NIH_Funded_PIs_2017_Human_AutoRIF
NIH_Funded_PIs_2017_Human_GeneRIF
NURSA_Human_Endogenous_Complexome
Old_CMAP_down
Old_CMAP_up
OMIM_Disease
OMIM_Expanded
Panther_2015
Panther_2016
Pfam_Domains_2019
Pfam_InterPro_Domains
PheWeb_2019
Phosphatase_Substrates_from_DEPOD
PPI_Hub_Proteins
ProteomicsDB_2020
Rare_Diseases_AutoRIF_ARCHS4_Predictions
Rare_Diseases_AutoRIF_Gene_Lists
Rare_Diseases_GeneRIF_ARCHS4_Predictions
Rare_Diseases_GeneRIF_Gene_Lists
Reactome_2013
Reactome_2015
Reactome_2016
RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO
SILAC_Phosphoproteomics
SubCell_BarCode
SysMyo_Muscle_Gene_Sets
Table_Mining_of_CRISPR_Studies
TargetScan_microRNA
TargetScan_microRNA_2017
TF-LOF_Expression_from_GEO
TF_Perturbations_Followed_by_Expression
TG_GATES_2020
Tissue_Protein_Expression_from_Human_Proteome_Map
Tissue_Protein_Expression_from_ProteomicsDB
Transcription_Factor_PPIs
TRANSFAC_and_JASPAR_PWMs
TRRUST_Transcription_Factors_2019
UK_Biobank_GWAS_v1
Virus-Host_PPI_P-HIPSTer_2020
Virus_Perturbations_from_GEO_down
Virus_Perturbations_from_GEO_up
VirusMINT
WikiPathways_2013
WikiPathways_2015
WikiPathways_2016
WikiPathways_2019_Human
WikiPathways_2019_Mouse
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment