leebird/mirna_ner_api.py

## mirna_ner_api.py
from __future__ import print_function, unicode_literals
import json
import urllib
import urllib2

# The API URL.
api_url = 'http://research.bioinformatics.udel.edu/miRTex/ner'

# The documents to be processed.
documents = {
    'doc1': 'Endogenous KLF4 bound to the promoter regions and promoted the expression of two microRNAs (miRs), miR-206 and miR-21 (i.e., miR-206/21).',
    'doc2': 'We determined the hub CDK6, BCL2, E2F3, PTEN, MYC, RB, and ERBB3 target genes and hub hsa-let-7c, hsa-miR-195-5p, hsa-miR-141-3p, hsa-miR-26a-5p, hsa-miR-23b-3p, and hsa-miR-125b-5p miRNAs of the constructed networks.',
    'doc3': 'MicroRNAs 206 and 21 cooperate to promote RAS-extracellular signal-regulated kinase signaling by suppressing the translation of RASA1 and SPRED1.',
}

# Dump the documents to a json string.
json_string = json.dumps(documents)

# Set the 'request' field value to the json string.
values = {'request': json_string}
data = urllib.urlencode(values)

# Send request and receive response.
req = urllib2.Request(api_url, data)
response = urllib2.urlopen(req)
json_return = response.read()

# Load result from the returned json string.
result = json.loads(json_return)

# Print result.
# Result is a hash table of <doc_id, mentions>. Each doc_id is
# mapped to a list of mentions. In the list of mentions, each mention
# is a 4-element tuple, <miRNA name, begin offset, end offset, miRNA numbers>.
# The begin offset is the mention's first character offset in the original
# text, while the end offset is the mention's last character offset in the
# original text + 1.
# The element of miRNA numbers is also a list, containing the number suffixes
# for the recognized miRNA. For example, for the name 'miR-206/21', the numbers
# are 206 and 21.
for doc_id, mentions in result.items():
    for mention in mentions:
        mention, begin, end, numbers = mention
        print(doc_id, mention, begin, end, '|'.join(numbers), sep='\t')

'''
Printed results:
doc2	hsa-miR-195-5p	98	112	195-5p
doc2	hsa-miR-141-3p	114	128	141-3p
doc2	hsa-miR-26a-5p	130	144	26a-5p
doc2	hsa-miR-23b-3p	146	160	23b-3p
doc2	hsa-miR-125b-5p	166	181	125b-5p
doc2	hsa-let-7c	86	96	let7c
doc3	MicroRNAs 206 and 21	0	20	206|21
doc1	miR-206	99	106	206
doc1	miR-21	111	117	21
doc1	miR-206/21	125	135	206|21
'''
	from __future__ import print_function, unicode_literals
	import json
	import urllib
	import urllib2

	# The API URL.
	api_url = 'http://research.bioinformatics.udel.edu/miRTex/ner'

	# The documents to be processed.
	documents = {
	'doc1': 'Endogenous KLF4 bound to the promoter regions and promoted the expression of two microRNAs (miRs), miR-206 and miR-21 (i.e., miR-206/21).',
	'doc2': 'We determined the hub CDK6, BCL2, E2F3, PTEN, MYC, RB, and ERBB3 target genes and hub hsa-let-7c, hsa-miR-195-5p, hsa-miR-141-3p, hsa-miR-26a-5p, hsa-miR-23b-3p, and hsa-miR-125b-5p miRNAs of the constructed networks.',
	'doc3': 'MicroRNAs 206 and 21 cooperate to promote RAS-extracellular signal-regulated kinase signaling by suppressing the translation of RASA1 and SPRED1.',
	}

	# Dump the documents to a json string.
	json_string = json.dumps(documents)

	# Set the 'request' field value to the json string.
	values = {'request': json_string}
	data = urllib.urlencode(values)

	# Send request and receive response.
	req = urllib2.Request(api_url, data)
	response = urllib2.urlopen(req)
	json_return = response.read()

	# Load result from the returned json string.
	result = json.loads(json_return)

	# Print result.
	# Result is a hash table of <doc_id, mentions>. Each doc_id is
	# mapped to a list of mentions. In the list of mentions, each mention
	# is a 4-element tuple, <miRNA name, begin offset, end offset, miRNA numbers>.
	# The begin offset is the mention's first character offset in the original
	# text, while the end offset is the mention's last character offset in the
	# original text + 1.
	# The element of miRNA numbers is also a list, containing the number suffixes
	# for the recognized miRNA. For example, for the name 'miR-206/21', the numbers
	# are 206 and 21.
	for doc_id, mentions in result.items():
	for mention in mentions:
	mention, begin, end, numbers = mention
	print(doc_id, mention, begin, end, '\|'.join(numbers), sep='\t')

	'''
	Printed results:
	doc2 hsa-miR-195-5p 98 112 195-5p
	doc2 hsa-miR-141-3p 114 128 141-3p
	doc2 hsa-miR-26a-5p 130 144 26a-5p
	doc2 hsa-miR-23b-3p 146 160 23b-3p
	doc2 hsa-miR-125b-5p 166 181 125b-5p
	doc2 hsa-let-7c 86 96 let7c
	doc3 MicroRNAs 206 and 21 0 20 206\|21
	doc1 miR-206 99 106 206
	doc1 miR-21 111 117 21
	doc1 miR-206/21 125 135 206\|21
	'''