Skip to content

Instantly share code, notes, and snippets.

@afrendeiro
Created August 9, 2016 17:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save afrendeiro/474cb1d9be3176ae9ac20b55c6369ac9 to your computer and use it in GitHub Desktop.
Save afrendeiro/474cb1d9be3176ae9ac20b55c6369ac9 to your computer and use it in GitHub Desktop.
Use Enrichr's API to get enrichments of a gene set
#! /usr/bin/env python
import argparse
import pandas as pd
import json
import requests
default_gene_set_libraries = [
'GO_Biological_Process_2015',
"ChEA_2015",
"KEGG_2016",
"ESCAPE",
"Epigenomics_Roadmap_HM_ChIP-seq",
"ENCODE_TF_ChIP-seq_2015",
"ENCODE_Histone_Modifications_2015",
"OMIM_Expanded",
"TF-LOF_Expression_from_GEO",
"Single_Gene_Perturbations_from_GEO_down",
"Single_Gene_Perturbations_from_GEO_up",
"Disease_Perturbations_from_GEO_down",
"Disease_Perturbations_from_GEO_up",
"Drug_Perturbations_from_GEO_down",
"Drug_Perturbations_from_GEO_up",
"WikiPathways_2016",
"Reactome_2016",
"BioCarta_2016",
"NCI-Nature_2016"
]
parser = argparse.ArgumentParser()
parser.add_argument(type=str, dest="input_file", help="Text file with one gene name per line.")
parser.add_argument(
"-o", "--output-file", type=str, dest="output_file",
default="enrichr_output.csv", help="Output file name. Default='enrichr_output.csv'")
parser.add_argument(
"-g", "--gene-sets", type=str, dest="gene_set_libraries",
default=",".join(default_gene_set_libraries),
help="Comma-separated list of gene set libraries to use.\nDefault='{}'".format(",".join(default_gene_set_libraries)))
args = parser.parse_args()
gene_names = pd.read_csv(args.input_file, header=None)
if len(gene_names) > 1:
gene_names = gene_names.squeeze().unique().tolist()
elif len(gene_names) == 1:
gene_names = [gene_names.squeeze()]
else:
import sys
sys.exit(0)
print("Gene set has %i genes" % len(gene_names))
ENRICHR_ADD = 'http://amp.pharm.mssm.edu/Enrichr/addList'
ENRICHR_RETRIEVE = 'http://amp.pharm.mssm.edu/Enrichr/enrich'
query_string = '?userListId=%s&backgroundType=%s'
results = pd.DataFrame()
for gene_set_library in args.gene_set_libraries.split(","):
print("Running Enrichr on %s gene set library." % gene_set_library)
# Build payload with gene list
attr = "\n".join(gene_names)
payload = {
'list': (None, attr),
'description': (None, gene_set_library)
}
# Request adding gene set
response = requests.post(ENRICHR_ADD, files=payload)
if not response.ok:
raise Exception('Error analyzing gene list')
# Track gene set ID
user_list_id = json.loads(response.text)['userListId']
# Request enriched sets in gene set
response = requests.get(
ENRICHR_RETRIEVE + query_string % (user_list_id, gene_set_library)
)
if not response.ok:
raise Exception('Error fetching enrichment results')
# Get enriched sets in gene set
res = json.loads(response.text)
# If there's no enrichemnt, continue
if len(res[res.keys()[0]]) < 1:
continue
# Put in dataframe
res = pd.DataFrame([pd.Series(s) for s in res[gene_set_library]])
res.columns = ["rank", "description", "p_value", "z_score", "combined_score", "genes", "adjusted_p_value"]
# Remember gene set library used
res["gene_set_library"] = gene_set_library
# Append to master dataframe
results = results.append(res, ignore_index=True)
results.to_csv(args.output_file, index=False, encoding='utf-8')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment