Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save felipealbrecht/f81c14d2a52e9543567c to your computer and use it in GitHub Desktop.
Save felipealbrecht/f81c14d2a52e9543567c to your computer and use it in GitHub Desktop.
Summarizing DNA methylation levels in liver tissue across H3K4me3 peaks regions derived from human liver cells
import xmlrpclib
import time
import os.path
user_key = 'anonymous_key'
url = "http://deepblue.mpi-inf.mpg.de/xmlrpc"
server = xmlrpclib.Server(url, encoding='UTF-8', allow_none=True)
print server.echo(user_key)
## List the H1-hESC samples from ENCODE
(status, H1_hESC_samples) = server.list_samples("H1-hESC",
{"source":"ENCODE"}, user_key)
H1_hESC_samples_ids = server.extract_ids(H1_hESC_samples)[1]
## List the peaks experiments from H3K27ac with the H1-hESC samples
(status, experiments) = server.list_experiments("hg19", "peaks", "H3K4me3", "",
H1_hESC_samples_ids, None,
"ENCODE", user_key)
## Find the experiment that come from a bed file
experiments_id = server.extract_ids(experiments)[1]
(status, exps_infos) = server.info(experiments_id, user_key)
h1_hESC_H3K4me3_experiment_name = [e["name"] for e in
exps_infos if e["extra_metadata"]['original_file_url'].endswith("bed.gz")]
if len(h1_hESC_H3K4me3_experiment_name) != 1:
print "It was expected only one h1_hESC_H3K4me3 experiment"
h1_hESC_H3K4me3_exp = h1_hESC_H3K4me3_experiment_name[0]
## Obtain the regions from the H1-hESC H3k4me3 peaks
(status, h1_hESC_H3K4me3) = server.select_experiments(h1_hESC_H3K4me3_exp,
None, None, None,
user_key)
## List the liver and hepatocyte experiments
ss, liver_experiments = server.list_experiments("hg19", "",
"DNA Methylation",
'liver',
None, "RRBS", "ENCODE", user_key)
liver_experiments_names = server.extract_names(liver_experiments)[1]
print liver_experiments_names
## RETRIEVE THE SUMMARY DATA
requests = []
request_id_experiment = {}
## For each liver or hepatocyte experiment
for liver_experiment in liver_experiments_names:
print "Processing", liver_experiment
# Select the experiment regions
(status, q_liver_data) = server.select_experiments(liver_experiment,
None, None, None,
user_key)
# Perform aggregation
(status, agg_id) = server.aggregate(q_liver_data, h1_hESC_H3K4me3,
'SCORE', user_key)
# Filter the aggregate regions that summarized at least 1 region
(res, q_filter) = server.filter_regions(agg_id, "@AGG.COUNT", ">", "0",
"number", user_key)
# Request regions
status, req = server.get_regions(q_filter,
"CHROMOSOME,START,END,@AGG.MEAN,@AGG.COUNT,@AGG.MAX,@AGG.MIN", user_key)
print liver_experiment, status, req
request_id_experiment[req] = liver_experiment
requests.append(req)
print requests
print request_id_experiment
if not os.path.isdir("data/"):
os.mkdir("data/")
# Here we are waiting for the requests be completed
# We store the results in the files and print the request info in case of error
while len(requests) > 0:
for req in requests[:]:
(s, ss) = server.info(req, user_key)
if ss[0]["state"] == "done":
print ss[0]
print "getting data from " + ss[0]["_id"]
(s, data) = server.get_request_data(req, user_key)
with open("data/"+request_id_experiment[req]+".bed", 'wb') as f:
f.write(data)
requests.remove(req)
if ss[0]["state"] == "failed":
print ss
requests.remove(req)
time.sleep(1.0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment