Last active
July 22, 2017 01:51
-
-
Save felipealbrecht/c84d81c78eb06b6a1d95 to your computer and use it in GitHub Desktop.
Calculating the mRNA expression level for your favorite genes across all regulatory T cells and subsequent filtering regarding those genes regions where the value of the column named “score” is higher than a given threshold
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xmlrpclib | |
import time | |
import os | |
url = "http://deepblue.mpi-inf.mpg.de/xmlrpc" | |
user_key = "anonymous_key" | |
server = xmlrpclib.Server(url, allow_none=True) | |
# obtain genes regions | |
gene_names = ['CCR1', 'CD164', 'CD1D', 'CD2', 'CD34', 'CD3G', 'CD44'] | |
# select regions of selected genes | |
(status, q_genes) = server.select_genes(gene_names, None, "gencode v23", | |
None, None, None, user_key) | |
# filter regions that are protein coding | |
(status, q_genes_regions) = server.filter_regions(q_genes, | |
"@GENE_ATTRIBUTE(gene_type)", | |
"==", "protein_coding", | |
"string", user_key) | |
# select all T cell related biosources | |
biosources = ['Regulatory T cell'] | |
liver_biosource_names = [] | |
for biosource in biosources: | |
(status, biosources) = server.get_biosource_related(biosource, user_key) | |
liver_biosource_names += server.extract_names(biosources)[1] | |
# Obtain the mRNA experiments names | |
(status, experiments) = server.list_experiments("GRCh38", "signal", "mRNA", | |
liver_biosource_names, None, None, | |
"BLUEPRINT Epigenome", | |
user_key) | |
hsc_experiment_names = server.extract_names(experiments)[1] | |
###### | |
requests = [] | |
request_id_experiment = {} | |
# Perform the aggregation | |
for hsc_experiment_name in hsc_experiment_names: | |
(status, q_exp) = server.select_experiments(hsc_experiment_name, None, None, | |
None, user_key) | |
(status, q_agg) = server.aggregate(q_exp, q_genes_regions, "VALUE", user_key) | |
(status, q_filtered) = server.filter_regions(q_agg, "@AGG.MEAN", ">", "0", | |
"number", user_key) | |
print "Summarization and filtering of " + hsc_experiment_name | |
status, req = server.get_regions(q_filtered, | |
"CHROMOSOME,START,END,@AGG.MEAN,@AGG.MAX,@AGG.MIN", user_key) | |
print hsc_experiment_name, status, req | |
request_id_experiment[req] = hsc_experiment_name | |
requests.append(req) | |
# print status | |
requests_data = {} | |
print requests | |
print request_id_experiment | |
if not os.path.isdir("data/"): | |
os.mkdir("data/") | |
# Here we are waiting for the requests be completed | |
while len(requests) > 0: | |
print "It is still missing " + str(len(requests)) + " requests" | |
for req in requests[:]: | |
(s, ss) = server.info(req, user_key) | |
print ss | |
if ss[0]["state"] == "done": | |
print ss[0] | |
print "getting data from " + ss[0]["_id"] | |
(s, data) = server.get_request_data(req, user_key) | |
with open("data/" + request_id_experiment[req] + ".bed", 'wb') as f: | |
f.write(data) | |
requests.remove(req) | |
if ss[0]["state"] == "failed": | |
print ss | |
requests.remove(req) | |
time.sleep(1.0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment