example of using corenlp server from python
This code requires server to already be running:
To start server:
java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000
To call it, e.g.:
curl --data "The man wanted to go to work." 'http://localhost:9000/?properties={%22annotators%22%3A%22tokenize%2Cssplit%2Cpos%2Cdepparse%22%2C%22outputFormat%22%3A%22conllu%22}'
other trick: use a remote server over ssh
1. on server machine, run the corenlp server
2. on local machine, run a tunnel in its own terminal:
ssh -NL 9000:remotehost:9000
now localhost:9000 connects to remote server at 9000. "curl http://localhost:9000 ..." should work.
import requests
import sys,json
def goparse(text, props={'annotators':'tokenize,ssplit'}):
if isinstance(text,unicode):
text = text.encode("utf-8")
assert isinstance(text,str)
assert 'annotators' in props
assert all(isinstance(v, (str,unicode)) for v in props.values())
r ='http://localhost:9000/', params={'properties':json.dumps(props)}, data=text)
return r.text
# try:
# return json.loads(r.text)
# except:
# return {'sentences':[]}
# print goparse("Based on the book, the movie is great.", {'annotators':'tokenize,ssplit,pos,depparse', 'outputFormat':'conllu'})
# print goparse("I went there to go there.", {'annotators':'tokenize,ssplit,pos,depparse', 'outputFormat':'conllu'})
# print goparse(sys.argv[1], {
# 'annotators':'tokenize,ssplit,pos,parse',
# 'parse.model':'edu/stanford/nlp/models/srparser/englishSR.ser.gz',
# 'parse.originalDependencies': "true",
# 'outputFormat':'conllu',
# })
p = goparse(sys.argv[1], {
p = json.loads(p)
print json.dumps(p,indent=True)
for d in sorted(p['sentences'][0]['basicDependencies'], key=lambda d: (d['governor'],d['dependent'])):
print (u"%s -%s-> %s" % (
d['governorGloss']+":%s" % d['governor'],
d['dependentGloss']+":%s" % d['dependent'],
