Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
example of using corenlp server from python
"""
example of using corenlp server from python
This code requires server to already be running: https://stanfordnlp.github.io/CoreNLP/corenlp-server.html
To start server:
java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000
To call it, e.g.:
curl --data "The man wanted to go to work." 'http://localhost:9000/?properties={%22annotators%22%3A%22tokenize%2Cssplit%2Cpos%2Cdepparse%22%2C%22outputFormat%22%3A%22conllu%22}'
other trick: use a remote server over ssh
1. on server machine, run the corenlp server
2. on local machine, run a tunnel in its own terminal:
ssh -NL 9000:remotehost:9000
now localhost:9000 connects to remote server at 9000. "curl http://localhost:9000 ..." should work.
"""
import requests
import sys,json
def goparse(text, props={'annotators':'tokenize,ssplit'}):
if isinstance(text,unicode):
text = text.encode("utf-8")
assert isinstance(text,str)
assert 'annotators' in props
assert all(isinstance(v, (str,unicode)) for v in props.values())
r = requests.post('http://localhost:9000/', params={'properties':json.dumps(props)}, data=text)
return r.text
# try:
# return json.loads(r.text)
# except:
# return {'sentences':[]}
# print goparse("Based on the book, the movie is great.", {'annotators':'tokenize,ssplit,pos,depparse', 'outputFormat':'conllu'})
# print goparse("I went there to go there.", {'annotators':'tokenize,ssplit,pos,depparse', 'outputFormat':'conllu'})
# print goparse(sys.argv[1], {
# 'annotators':'tokenize,ssplit,pos,parse',
# 'parse.model':'edu/stanford/nlp/models/srparser/englishSR.ser.gz',
# 'parse.originalDependencies': "true",
# 'outputFormat':'conllu',
# })
p = goparse(sys.argv[1], {
'annotators':'tokenize,ssplit,pos,depparse',
'depparse.model':'edu/stanford/nlp/models/parser/nndep/english_SD.gz',
})
p = json.loads(p)
print json.dumps(p,indent=True)
for d in sorted(p['sentences'][0]['basicDependencies'], key=lambda d: (d['governor'],d['dependent'])):
print (u"%s -%s-> %s" % (
d['governorGloss']+":%s" % d['governor'],
d['dep'],
d['dependentGloss']+":%s" % d['dependent'],
)).encode("utf8")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.