Skip to content

Instantly share code, notes, and snippets.

@afrendeiro
Last active May 3, 2016 09:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save afrendeiro/e9b7b3ce380c070d7cc6b550c1cc07db to your computer and use it in GitHub Desktop.
Save afrendeiro/e9b7b3ce380c070d7cc6b550c1cc07db to your computer and use it in GitHub Desktop.
Function to use Enrichr's API
def enrichr(dataframe, gene_set_libraries=None, kind="genes"):
"""
Use Enrichr on a list of genes (currently only genes supported through the API).
"""
import json
import requests
import pandas
ENRICHR_ADD = 'http://amp.pharm.mssm.edu/Enrichr/addList'
ENRICHR_RETRIEVE = 'http://amp.pharm.mssm.edu/Enrichr/enrich'
query_string = '?userListId=%s&backgroundType=%s'
if gene_set_libraries is None:
gene_set_libraries = [
'GO_Biological_Process_2015',
"ChEA_2015",
"KEGG_2016",
"WikiPathways_2016",
"Reactome_2016",
"BioCarta_2016",
"NCI-Nature_2016"
]
results = pd.DataFrame()
for gene_set_library in gene_set_libraries:
print("Using enricher on %s gene set library." % gene_set_library)
if kind == "genes":
# Build payload with bed file
attr = "\n".join(dataframe["gene_name"].dropna().tolist())
elif kind == "regions":
# Build payload with bed file
attr = "\n".join(dataframe[['chrom', 'start', 'end']].apply(lambda x: "\t".join([str(i) for i in x]), axis=1).tolist())
payload = {
'list': (None, attr),
'description': (None, gene_set_library)
}
# Request adding gene set
response = requests.post(ENRICHR_ADD, files=payload)
if not response.ok:
raise Exception('Error adding gene list')
# Track gene set ID
user_list_id = json.loads(response.text)['userListId']
# Request enriched sets in gene set
response = requests.get(
ENRICHR_RETRIEVE + query_string % (user_list_id, gene_set_library)
)
if not response.ok:
raise Exception('Error fetching enrichment results')
# Get enriched sets in gene set
res = json.loads(response.text)
# If there's no enrichemnt, continue
if len(res) < 0:
continue
# Put in dataframe
res = pd.DataFrame([pd.Series(s) for s in res[gene_set_library]])
res.columns = ["rank", "description", "p_value", "z_score", "combined_score", "genes", "adjusted_p_value"]
# Remember gene set library used
res["gene_set_library"] = gene_set_library
# Append to master dataframe
results = results.append(res, ignore_index=True)
return results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment