Last active
May 3, 2016 09:08
-
-
Save afrendeiro/e9b7b3ce380c070d7cc6b550c1cc07db to your computer and use it in GitHub Desktop.
Function to use Enrichr's API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def enrichr(dataframe, gene_set_libraries=None, kind="genes"): | |
""" | |
Use Enrichr on a list of genes (currently only genes supported through the API). | |
""" | |
import json | |
import requests | |
import pandas | |
ENRICHR_ADD = 'http://amp.pharm.mssm.edu/Enrichr/addList' | |
ENRICHR_RETRIEVE = 'http://amp.pharm.mssm.edu/Enrichr/enrich' | |
query_string = '?userListId=%s&backgroundType=%s' | |
if gene_set_libraries is None: | |
gene_set_libraries = [ | |
'GO_Biological_Process_2015', | |
"ChEA_2015", | |
"KEGG_2016", | |
"WikiPathways_2016", | |
"Reactome_2016", | |
"BioCarta_2016", | |
"NCI-Nature_2016" | |
] | |
results = pd.DataFrame() | |
for gene_set_library in gene_set_libraries: | |
print("Using enricher on %s gene set library." % gene_set_library) | |
if kind == "genes": | |
# Build payload with bed file | |
attr = "\n".join(dataframe["gene_name"].dropna().tolist()) | |
elif kind == "regions": | |
# Build payload with bed file | |
attr = "\n".join(dataframe[['chrom', 'start', 'end']].apply(lambda x: "\t".join([str(i) for i in x]), axis=1).tolist()) | |
payload = { | |
'list': (None, attr), | |
'description': (None, gene_set_library) | |
} | |
# Request adding gene set | |
response = requests.post(ENRICHR_ADD, files=payload) | |
if not response.ok: | |
raise Exception('Error adding gene list') | |
# Track gene set ID | |
user_list_id = json.loads(response.text)['userListId'] | |
# Request enriched sets in gene set | |
response = requests.get( | |
ENRICHR_RETRIEVE + query_string % (user_list_id, gene_set_library) | |
) | |
if not response.ok: | |
raise Exception('Error fetching enrichment results') | |
# Get enriched sets in gene set | |
res = json.loads(response.text) | |
# If there's no enrichemnt, continue | |
if len(res) < 0: | |
continue | |
# Put in dataframe | |
res = pd.DataFrame([pd.Series(s) for s in res[gene_set_library]]) | |
res.columns = ["rank", "description", "p_value", "z_score", "combined_score", "genes", "adjusted_p_value"] | |
# Remember gene set library used | |
res["gene_set_library"] = gene_set_library | |
# Append to master dataframe | |
results = results.append(res, ignore_index=True) | |
return results | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment