Last active
July 22, 2017 01:43
-
-
Save felipealbrecht/f81c14d2a52e9543567c to your computer and use it in GitHub Desktop.
Summarizing DNA methylation levels in liver tissue across H3K4me3 peaks regions derived from human liver cells
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xmlrpclib | |
import time | |
import os.path | |
user_key = 'anonymous_key' | |
url = "http://deepblue.mpi-inf.mpg.de/xmlrpc" | |
server = xmlrpclib.Server(url, encoding='UTF-8', allow_none=True) | |
print server.echo(user_key) | |
## List the H1-hESC samples from ENCODE | |
(status, H1_hESC_samples) = server.list_samples("H1-hESC", | |
{"source":"ENCODE"}, user_key) | |
H1_hESC_samples_ids = server.extract_ids(H1_hESC_samples)[1] | |
## List the peaks experiments from H3K27ac with the H1-hESC samples | |
(status, experiments) = server.list_experiments("hg19", "peaks", "H3K4me3", "", | |
H1_hESC_samples_ids, None, | |
"ENCODE", user_key) | |
## Find the experiment that come from a bed file | |
experiments_id = server.extract_ids(experiments)[1] | |
(status, exps_infos) = server.info(experiments_id, user_key) | |
h1_hESC_H3K4me3_experiment_name = [e["name"] for e in | |
exps_infos if e["extra_metadata"]['original_file_url'].endswith("bed.gz")] | |
if len(h1_hESC_H3K4me3_experiment_name) != 1: | |
print "It was expected only one h1_hESC_H3K4me3 experiment" | |
h1_hESC_H3K4me3_exp = h1_hESC_H3K4me3_experiment_name[0] | |
## Obtain the regions from the H1-hESC H3k4me3 peaks | |
(status, h1_hESC_H3K4me3) = server.select_experiments(h1_hESC_H3K4me3_exp, | |
None, None, None, | |
user_key) | |
## List the liver and hepatocyte experiments | |
ss, liver_experiments = server.list_experiments("hg19", "", | |
"DNA Methylation", | |
'liver', | |
None, "RRBS", "ENCODE", user_key) | |
liver_experiments_names = server.extract_names(liver_experiments)[1] | |
print liver_experiments_names | |
## RETRIEVE THE SUMMARY DATA | |
requests = [] | |
request_id_experiment = {} | |
## For each liver or hepatocyte experiment | |
for liver_experiment in liver_experiments_names: | |
print "Processing", liver_experiment | |
# Select the experiment regions | |
(status, q_liver_data) = server.select_experiments(liver_experiment, | |
None, None, None, | |
user_key) | |
# Perform aggregation | |
(status, agg_id) = server.aggregate(q_liver_data, h1_hESC_H3K4me3, | |
'SCORE', user_key) | |
# Filter the aggregate regions that summarized at least 1 region | |
(res, q_filter) = server.filter_regions(agg_id, "@AGG.COUNT", ">", "0", | |
"number", user_key) | |
# Request regions | |
status, req = server.get_regions(q_filter, | |
"CHROMOSOME,START,END,@AGG.MEAN,@AGG.COUNT,@AGG.MAX,@AGG.MIN", user_key) | |
print liver_experiment, status, req | |
request_id_experiment[req] = liver_experiment | |
requests.append(req) | |
print requests | |
print request_id_experiment | |
if not os.path.isdir("data/"): | |
os.mkdir("data/") | |
# Here we are waiting for the requests be completed | |
# We store the results in the files and print the request info in case of error | |
while len(requests) > 0: | |
for req in requests[:]: | |
(s, ss) = server.info(req, user_key) | |
if ss[0]["state"] == "done": | |
print ss[0] | |
print "getting data from " + ss[0]["_id"] | |
(s, data) = server.get_request_data(req, user_key) | |
with open("data/"+request_id_experiment[req]+".bed", 'wb') as f: | |
f.write(data) | |
requests.remove(req) | |
if ss[0]["state"] == "failed": | |
print ss | |
requests.remove(req) | |
time.sleep(1.0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment