Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Summarizing methylation level by CpG Island of the monocyte cells
# Summarizing methylation level by CpG Island of the monocyte cells
import xmlrpclib
import time
# Before going further, we must set up the client:
url = "http://deepblue.mpi-inf.mpg.de/xmlrpc"
server = xmlrpclib.Server(url, allow_none=True)
# You can use the anonymous key or your own user key
user_key = "anonymous_key"
# Select the biosources related to monocyte
(s, monocytes_biosources) = server.get_biosource_children("monocyte", user_key)
# Obtain the biosources nams
monocytes_biosources_names = server.extract_names(monocytes_biosources)[1]
# Obtain the samples from BLUEPRINT
(status, samples) = server.list_samples(monocytes_biosources_names, {"source": "BLUEPRINT Epigenome"}, user_key)
# Obtain the samples IDs
samples_ids = server.extract_ids(samples)[1]
# Obtain the experiments
(status, experiments) = server.list_experiments("", "", "DNA Methylation", "", samples_ids, "Bisulfite-Seq", "Blueprint Epigenome", user_key)
# Filter the experiments that are BS_METH_CALL_CNAG
selected_experiments = []
peak_format = "CHROMOSOME,START,END,NAME,SCORE,STRAND,SIGNAL_VALUE,P_VALUE,Q_VALUE"
for experiment in experiments:
(status, info) = server.info(experiment[0], user_key) # experiment[0] is the ID of the experiment.
if info[0]["data_type"] == "signal" and info[0]["extra_metadata"]["FILE_TYPE"] == "BS_METH_CALL_CNAG":
selected_experiments.append(experiment)
experiment_names = server.extract_names(selected_experiments)[1]
# Select the regions of the chromosome 1
(status, query_id) = server.select_regions(experiment_names, None, None, None, None, None, "chr1", None, None, user_key )
# Count how many regions where selected
(status, request_id) = server.count_regions(query_id, user_key)
# You can see yours requests using list_requests:
server.list_requests("", user_key)
# Or obtain information about a specific request with the 'info'
server.info(request_id, user_key)
# Wait for the server processing
(status, info) = server.info(request_id, user_key)
request_status = info[0]["state"]
while request_status != "done" and request_status != "failed":
time.sleep(1)
(status, info) = server.info(request_id, user_key)
request_status = info[0]["state"]
(status, count) = server.get_request_data(request_id, user_key)
print "We selected", count, "regions"
#
# Now, we will use te command score_matrix to obtain the regions aggregation scores.
#
# We will perform the aggregation using the CpG Islands regions
(status, annotation_key) = server.select_annotations("Cpg Islands", "GRCh38", None, None, None, user_key)
# The aggregation will be made on the columns named VALUE.
experiments_columns = {}
for experiment_name in experiment_names:
experiments_columns[experiment_name] = "VALUE"
# The score_matrix has the following parameters experiments_and_columns, aggregation_function, regions_query_id, user_key
(status, request_id) = server.score_matrix(experiments_columns, "mean", annotation_key , user_key )
# Wait for the server processing
(status, info) = server.info(request_id, user_key)
request_status = info[0]["state"]
while request_status != "done" and request_status != "failed":
time.sleep(1)
(status, info) = server.info(request_id, user_key)
request_status = info[0]["state"]
# Obtain the score matrix data and print it
(status, data) = server.get_request_data(request_id, user_key)
print data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment