brendano/corenlp_client_example.py

## corenlp_client_example.py
"""
example of using corenlp server from python

This code requires server to already be running: https://stanfordnlp.github.io/CoreNLP/corenlp-server.html
To start server:
java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000

To call it, e.g.:
curl --data "The man wanted to go to work." 'http://localhost:9000/?properties={%22annotators%22%3A%22tokenize%2Cssplit%2Cpos%2Cdepparse%22%2C%22outputFormat%22%3A%22conllu%22}'

other trick: use a remote server over ssh
1. on server machine, run the corenlp server
2. on local machine, run a tunnel in its own terminal:
    ssh -NL 9000:remotehost:9000
now localhost:9000 connects to remote server at 9000.  "curl http://localhost:9000 ..." should work.
"""
import requests
import sys,json

def goparse(text, props={'annotators':'tokenize,ssplit'}):
    if isinstance(text,unicode):
        text = text.encode("utf-8")
    assert isinstance(text,str)
    assert 'annotators' in props
    assert all(isinstance(v, (str,unicode)) for v in props.values())
    r = requests.post('http://localhost:9000/', params={'properties':json.dumps(props)}, data=text)
    return r.text
    # try:
    #     return json.loads(r.text)
    # except:
    #     return {'sentences':[]}

# print goparse("Based on the book, the movie is great.", {'annotators':'tokenize,ssplit,pos,depparse', 'outputFormat':'conllu'})

# print goparse("I went there to go there.", {'annotators':'tokenize,ssplit,pos,depparse', 'outputFormat':'conllu'})

# print goparse(sys.argv[1], {
#     'annotators':'tokenize,ssplit,pos,parse',
#     'parse.model':'edu/stanford/nlp/models/srparser/englishSR.ser.gz',
#     'parse.originalDependencies': "true",
#     'outputFormat':'conllu',
# })


p = goparse(sys.argv[1], {
    'annotators':'tokenize,ssplit,pos,depparse',
    'depparse.model':'edu/stanford/nlp/models/parser/nndep/english_SD.gz',
    })
p = json.loads(p)
print json.dumps(p,indent=True)

for d in sorted(p['sentences'][0]['basicDependencies'], key=lambda d: (d['governor'],d['dependent'])):
    print (u"%s -%s-> %s" % (
        d['governorGloss']+":%s" % d['governor'],
        d['dep'],
        d['dependentGloss']+":%s" % d['dependent'],
        )).encode("utf8")
	"""
	example of using corenlp server from python

	This code requires server to already be running: https://stanfordnlp.github.io/CoreNLP/corenlp-server.html
	To start server:
	java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000

	To call it, e.g.:
	curl --data "The man wanted to go to work." 'http://localhost:9000/?properties={%22annotators%22%3A%22tokenize%2Cssplit%2Cpos%2Cdepparse%22%2C%22outputFormat%22%3A%22conllu%22}'

	other trick: use a remote server over ssh
	1. on server machine, run the corenlp server
	2. on local machine, run a tunnel in its own terminal:
	ssh -NL 9000:remotehost:9000
	now localhost:9000 connects to remote server at 9000. "curl http://localhost:9000 ..." should work.
	"""
	import requests
	import sys,json

	def goparse(text, props={'annotators':'tokenize,ssplit'}):
	if isinstance(text,unicode):
	text = text.encode("utf-8")
	assert isinstance(text,str)
	assert 'annotators' in props
	assert all(isinstance(v, (str,unicode)) for v in props.values())
	r = requests.post('http://localhost:9000/', params={'properties':json.dumps(props)}, data=text)
	return r.text
	# try:
	# return json.loads(r.text)
	# except:
	# return {'sentences':[]}

	# print goparse("Based on the book, the movie is great.", {'annotators':'tokenize,ssplit,pos,depparse', 'outputFormat':'conllu'})

	# print goparse("I went there to go there.", {'annotators':'tokenize,ssplit,pos,depparse', 'outputFormat':'conllu'})

	# print goparse(sys.argv[1], {
	# 'annotators':'tokenize,ssplit,pos,parse',
	# 'parse.model':'edu/stanford/nlp/models/srparser/englishSR.ser.gz',
	# 'parse.originalDependencies': "true",
	# 'outputFormat':'conllu',
	# })


	p = goparse(sys.argv[1], {
	'annotators':'tokenize,ssplit,pos,depparse',
	'depparse.model':'edu/stanford/nlp/models/parser/nndep/english_SD.gz',
	})
	p = json.loads(p)
	print json.dumps(p,indent=True)

	for d in sorted(p['sentences'][0]['basicDependencies'], key=lambda d: (d['governor'],d['dependent'])):
	print (u"%s -%s-> %s" % (
	d['governorGloss']+":%s" % d['governor'],
	d['dep'],
	d['dependentGloss']+":%s" % d['dependent'],
	)).encode("utf8")