Last active
December 6, 2016 10:28
-
-
Save felipealbrecht/bca3de38a5d3e1cef7ae to your computer and use it in GitHub Desktop.
Aggregate the methylation data signal of the liver data experiments by the H-ESC H3K4me3 peaks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Aggregate the methylation data signal of the liver data experiments by the H-ESC H3K4me3 peaks | |
## Felipe Albrecht - | |
import xmlrpclib | |
import time | |
user_key = 'USER_KEY' | |
url = "http://deepblue.mpi-inf.mpg.de/xmlrpc" | |
server = xmlrpclib.Server(url, encoding='UTF-8', allow_none=True) | |
print server.echo(user_key) | |
## OBTAIN THE ENCODE H1-hESC SAMPLE | |
(status, H1_hESC_samples) = server.list_samples("H1-hESC", {"source":"ENCODE"}, user_key) | |
H1_hESC_samples_ids = [sample[0] for sample in H1_hESC_samples] | |
## GET ALL EXPERIMENTS WITH EPIGENETIC MARK H3K4ME3 | |
(status, experiments) = server.list_experiments("hg19", "H3K4me3", H1_hESC_samples_ids, None, "ENCODE", user_key) | |
## GET THE EXPERIMENT NAME | |
experiments_id = [experiment[0] for experiment in experiments] | |
(status, experiments_info) = server.info(experiments_id, user_key) | |
h1_hESC_H3K4me3_experiment_name = [experiment["name"] for experiment in experiments_info if experiment["data_type"] == "peaks" and experiment["extra_metadata"]['original_file_url'].endswith("bed.gz")] | |
if len(h1_hESC_H3K4me3_experiment_name) != 1: | |
print "It was expected one h1_hESC_H3K4me3 experiment" | |
h1_hESC_H3K4me3_experiment_name = h1_hESC_H3K4me3_experiment_name[0] | |
## SELECT THE DATA FROM THE GIVEN EXPERIMENT | |
(status, q_h1_hESC_H3K4me3_peaks) = server.select_regions(h1_hESC_H3K4me3_experiment_name, None, None, None, None, None, None, None, None, user_key) | |
print q_h1_hESC_H3K4me3_peaks | |
(s, r) = server.count_regions(q_h1_hESC_H3K4me3_peaks, user_key) | |
(s, status) = server.info(r, user_key) | |
print status | |
while (status[0]["state"] != "done"): | |
time.sleep(1.0) | |
(s, status) = server.info(r, user_key) | |
print server.get_request_data(r, user_key) | |
## SELECT THE LIVER EXPERIMENTS | |
(status, biosources) = server.get_biosource_children('liver', user_key) | |
liver_biosource_names = [biosource[1] for biosource in biosources] | |
status, liver_samples = server.list_samples(liver_biosource_names, {"source": "ENCODE"}, user_key) | |
liver_samples_ids = [sample[0] for sample in liver_samples] | |
ss, liver_experiments = server.list_experiments("hg19", "DNA Methylation", liver_samples_ids, None, "ENCODE", user_key) | |
liver_experiments_id = [experiment[0] for experiment in liver_experiments] | |
(status, liver_experiments_info) = server.info(liver_experiments_id, user_key) | |
liver_experiments_names = [experiment["name"] for experiment in liver_experiments_info] | |
print liver_experiments_names | |
## RETRIEVE THE SUMMARY DATA | |
requests = [] | |
request_id_experiment = {} | |
for liver_experiment in liver_experiments_names: | |
print liver_experiment | |
(status, q_liver_data) = server.select_regions(liver_experiment, "hg19", None, None, None, None, None, None, None, user_key) | |
(s, r) = server.count_regions(q_liver_data, user_key) | |
(s, status) = server.info(r, user_key) | |
while (status[0]["state"] != "done"): | |
print status | |
time.sleep(1.0) | |
(s, status) = server.info(r, user_key) | |
print server.get_request_data(r, user_key) | |
(status, agg_id) = server.aggregate(q_liver_data, q_h1_hESC_H3K4me3_peaks, 'SCORE', user_key) | |
(res, q_filter) = server.filter_regions(agg_id, "@AGG.COUNT", ">", "0", "number", user_key) | |
print "Summarization of " + liver_experiment + " over " + h1_hESC_H3K4me3_experiment_name + " regions." | |
status, req = server.get_regions(q_filter, "CHROMOSOME,START,END,@AGG.MEAN,@AGG.COUNT,@AGG.MAX,@AGG.MIN", user_key) | |
print liver_experiment, status, req | |
request_id_experiment[req] = liver_experiment | |
requests.append(req) | |
requests_data = {} | |
print requests | |
print request_id_experiment | |
while len(requests) > 0: | |
for req in requests[:]: | |
(s, ss) = server.info(req, user_key) | |
if ss[0]["state"] == "done": | |
print ss[0] | |
print "getting data from " + ss[0]["_id"] | |
(s, data) = server.get_request_data(req, user_key) | |
with open("data/"+request_id_experiment[req]+".bed", 'wb') as f: | |
f.write(data) | |
requests.remove(req) | |
time.sleep(1.0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment