Skip to content

Instantly share code, notes, and snippets.

@felipealbrecht
Last active December 6, 2016 10:28
Show Gist options
  • Save felipealbrecht/bca3de38a5d3e1cef7ae to your computer and use it in GitHub Desktop.
Save felipealbrecht/bca3de38a5d3e1cef7ae to your computer and use it in GitHub Desktop.
Aggregate the methylation data signal of the liver data experiments by the H-ESC H3K4me3 peaks
## Aggregate the methylation data signal of the liver data experiments by the H-ESC H3K4me3 peaks
## Felipe Albrecht -
import xmlrpclib
import time
user_key = 'USER_KEY'
url = "http://deepblue.mpi-inf.mpg.de/xmlrpc"
server = xmlrpclib.Server(url, encoding='UTF-8', allow_none=True)
print server.echo(user_key)
## OBTAIN THE ENCODE H1-hESC SAMPLE
(status, H1_hESC_samples) = server.list_samples("H1-hESC", {"source":"ENCODE"}, user_key)
H1_hESC_samples_ids = [sample[0] for sample in H1_hESC_samples]
## GET ALL EXPERIMENTS WITH EPIGENETIC MARK H3K4ME3
(status, experiments) = server.list_experiments("hg19", "H3K4me3", H1_hESC_samples_ids, None, "ENCODE", user_key)
## GET THE EXPERIMENT NAME
experiments_id = [experiment[0] for experiment in experiments]
(status, experiments_info) = server.info(experiments_id, user_key)
h1_hESC_H3K4me3_experiment_name = [experiment["name"] for experiment in experiments_info if experiment["data_type"] == "peaks" and experiment["extra_metadata"]['original_file_url'].endswith("bed.gz")]
if len(h1_hESC_H3K4me3_experiment_name) != 1:
print "It was expected one h1_hESC_H3K4me3 experiment"
h1_hESC_H3K4me3_experiment_name = h1_hESC_H3K4me3_experiment_name[0]
## SELECT THE DATA FROM THE GIVEN EXPERIMENT
(status, q_h1_hESC_H3K4me3_peaks) = server.select_regions(h1_hESC_H3K4me3_experiment_name, None, None, None, None, None, None, None, None, user_key)
print q_h1_hESC_H3K4me3_peaks
(s, r) = server.count_regions(q_h1_hESC_H3K4me3_peaks, user_key)
(s, status) = server.info(r, user_key)
print status
while (status[0]["state"] != "done"):
time.sleep(1.0)
(s, status) = server.info(r, user_key)
print server.get_request_data(r, user_key)
## SELECT THE LIVER EXPERIMENTS
(status, biosources) = server.get_biosource_children('liver', user_key)
liver_biosource_names = [biosource[1] for biosource in biosources]
status, liver_samples = server.list_samples(liver_biosource_names, {"source": "ENCODE"}, user_key)
liver_samples_ids = [sample[0] for sample in liver_samples]
ss, liver_experiments = server.list_experiments("hg19", "DNA Methylation", liver_samples_ids, None, "ENCODE", user_key)
liver_experiments_id = [experiment[0] for experiment in liver_experiments]
(status, liver_experiments_info) = server.info(liver_experiments_id, user_key)
liver_experiments_names = [experiment["name"] for experiment in liver_experiments_info]
print liver_experiments_names
## RETRIEVE THE SUMMARY DATA
requests = []
request_id_experiment = {}
for liver_experiment in liver_experiments_names:
print liver_experiment
(status, q_liver_data) = server.select_regions(liver_experiment, "hg19", None, None, None, None, None, None, None, user_key)
(s, r) = server.count_regions(q_liver_data, user_key)
(s, status) = server.info(r, user_key)
while (status[0]["state"] != "done"):
print status
time.sleep(1.0)
(s, status) = server.info(r, user_key)
print server.get_request_data(r, user_key)
(status, agg_id) = server.aggregate(q_liver_data, q_h1_hESC_H3K4me3_peaks, 'SCORE', user_key)
(res, q_filter) = server.filter_regions(agg_id, "@AGG.COUNT", ">", "0", "number", user_key)
print "Summarization of " + liver_experiment + " over " + h1_hESC_H3K4me3_experiment_name + " regions."
status, req = server.get_regions(q_filter, "CHROMOSOME,START,END,@AGG.MEAN,@AGG.COUNT,@AGG.MAX,@AGG.MIN", user_key)
print liver_experiment, status, req
request_id_experiment[req] = liver_experiment
requests.append(req)
requests_data = {}
print requests
print request_id_experiment
while len(requests) > 0:
for req in requests[:]:
(s, ss) = server.info(req, user_key)
if ss[0]["state"] == "done":
print ss[0]
print "getting data from " + ss[0]["_id"]
(s, data) = server.get_request_data(req, user_key)
with open("data/"+request_id_experiment[req]+".bed", 'wb') as f:
f.write(data)
requests.remove(req)
time.sleep(1.0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment