Created
August 9, 2016 17:20
-
-
Save afrendeiro/474cb1d9be3176ae9ac20b55c6369ac9 to your computer and use it in GitHub Desktop.
Use Enrichr's API to get enrichments of a gene set
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
import argparse | |
import pandas as pd | |
import json | |
import requests | |
default_gene_set_libraries = [ | |
'GO_Biological_Process_2015', | |
"ChEA_2015", | |
"KEGG_2016", | |
"ESCAPE", | |
"Epigenomics_Roadmap_HM_ChIP-seq", | |
"ENCODE_TF_ChIP-seq_2015", | |
"ENCODE_Histone_Modifications_2015", | |
"OMIM_Expanded", | |
"TF-LOF_Expression_from_GEO", | |
"Single_Gene_Perturbations_from_GEO_down", | |
"Single_Gene_Perturbations_from_GEO_up", | |
"Disease_Perturbations_from_GEO_down", | |
"Disease_Perturbations_from_GEO_up", | |
"Drug_Perturbations_from_GEO_down", | |
"Drug_Perturbations_from_GEO_up", | |
"WikiPathways_2016", | |
"Reactome_2016", | |
"BioCarta_2016", | |
"NCI-Nature_2016" | |
] | |
parser = argparse.ArgumentParser() | |
parser.add_argument(type=str, dest="input_file", help="Text file with one gene name per line.") | |
parser.add_argument( | |
"-o", "--output-file", type=str, dest="output_file", | |
default="enrichr_output.csv", help="Output file name. Default='enrichr_output.csv'") | |
parser.add_argument( | |
"-g", "--gene-sets", type=str, dest="gene_set_libraries", | |
default=",".join(default_gene_set_libraries), | |
help="Comma-separated list of gene set libraries to use.\nDefault='{}'".format(",".join(default_gene_set_libraries))) | |
args = parser.parse_args() | |
gene_names = pd.read_csv(args.input_file, header=None) | |
if len(gene_names) > 1: | |
gene_names = gene_names.squeeze().unique().tolist() | |
elif len(gene_names) == 1: | |
gene_names = [gene_names.squeeze()] | |
else: | |
import sys | |
sys.exit(0) | |
print("Gene set has %i genes" % len(gene_names)) | |
ENRICHR_ADD = 'http://amp.pharm.mssm.edu/Enrichr/addList' | |
ENRICHR_RETRIEVE = 'http://amp.pharm.mssm.edu/Enrichr/enrich' | |
query_string = '?userListId=%s&backgroundType=%s' | |
results = pd.DataFrame() | |
for gene_set_library in args.gene_set_libraries.split(","): | |
print("Running Enrichr on %s gene set library." % gene_set_library) | |
# Build payload with gene list | |
attr = "\n".join(gene_names) | |
payload = { | |
'list': (None, attr), | |
'description': (None, gene_set_library) | |
} | |
# Request adding gene set | |
response = requests.post(ENRICHR_ADD, files=payload) | |
if not response.ok: | |
raise Exception('Error analyzing gene list') | |
# Track gene set ID | |
user_list_id = json.loads(response.text)['userListId'] | |
# Request enriched sets in gene set | |
response = requests.get( | |
ENRICHR_RETRIEVE + query_string % (user_list_id, gene_set_library) | |
) | |
if not response.ok: | |
raise Exception('Error fetching enrichment results') | |
# Get enriched sets in gene set | |
res = json.loads(response.text) | |
# If there's no enrichemnt, continue | |
if len(res[res.keys()[0]]) < 1: | |
continue | |
# Put in dataframe | |
res = pd.DataFrame([pd.Series(s) for s in res[gene_set_library]]) | |
res.columns = ["rank", "description", "p_value", "z_score", "combined_score", "genes", "adjusted_p_value"] | |
# Remember gene set library used | |
res["gene_set_library"] = gene_set_library | |
# Append to master dataframe | |
results = results.append(res, ignore_index=True) | |
results.to_csv(args.output_file, index=False, encoding='utf-8') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment