Skip to content

Instantly share code, notes, and snippets.

@brendano
Last active July 15, 2018 05:38
Show Gist options
  • Save brendano/29d9dc619bd7e087b459e6027a52af89 to your computer and use it in GitHub Desktop.
Save brendano/29d9dc619bd7e087b459e6027a52af89 to your computer and use it in GitHub Desktop.
example of using corenlp server from python
"""
example of using corenlp server from python
This code requires server to already be running: https://stanfordnlp.github.io/CoreNLP/corenlp-server.html
To start server:
java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000
To call it, e.g.:
curl --data "The man wanted to go to work." 'http://localhost:9000/?properties={%22annotators%22%3A%22tokenize%2Cssplit%2Cpos%2Cdepparse%22%2C%22outputFormat%22%3A%22conllu%22}'
other trick: use a remote server over ssh
1. on server machine, run the corenlp server
2. on local machine, run a tunnel in its own terminal:
ssh -NL 9000:remotehost:9000
now localhost:9000 connects to remote server at 9000. "curl http://localhost:9000 ..." should work.
"""
import requests
import sys,json
def goparse(text, props={'annotators':'tokenize,ssplit'}):
if isinstance(text,unicode):
text = text.encode("utf-8")
assert isinstance(text,str)
assert 'annotators' in props
assert all(isinstance(v, (str,unicode)) for v in props.values())
r = requests.post('http://localhost:9000/', params={'properties':json.dumps(props)}, data=text)
return r.text
# try:
# return json.loads(r.text)
# except:
# return {'sentences':[]}
# print goparse("Based on the book, the movie is great.", {'annotators':'tokenize,ssplit,pos,depparse', 'outputFormat':'conllu'})
# print goparse("I went there to go there.", {'annotators':'tokenize,ssplit,pos,depparse', 'outputFormat':'conllu'})
# print goparse(sys.argv[1], {
# 'annotators':'tokenize,ssplit,pos,parse',
# 'parse.model':'edu/stanford/nlp/models/srparser/englishSR.ser.gz',
# 'parse.originalDependencies': "true",
# 'outputFormat':'conllu',
# })
p = goparse(sys.argv[1], {
'annotators':'tokenize,ssplit,pos,depparse',
'depparse.model':'edu/stanford/nlp/models/parser/nndep/english_SD.gz',
})
p = json.loads(p)
print json.dumps(p,indent=True)
for d in sorted(p['sentences'][0]['basicDependencies'], key=lambda d: (d['governor'],d['dependent'])):
print (u"%s -%s-> %s" % (
d['governorGloss']+":%s" % d['governor'],
d['dep'],
d['dependentGloss']+":%s" % d['dependent'],
)).encode("utf8")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment