Last active
July 15, 2018 05:38
-
-
Save brendano/29d9dc619bd7e087b459e6027a52af89 to your computer and use it in GitHub Desktop.
example of using corenlp server from python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
example of using corenlp server from python | |
This code requires server to already be running: https://stanfordnlp.github.io/CoreNLP/corenlp-server.html | |
To start server: | |
java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000 | |
To call it, e.g.: | |
curl --data "The man wanted to go to work." 'http://localhost:9000/?properties={%22annotators%22%3A%22tokenize%2Cssplit%2Cpos%2Cdepparse%22%2C%22outputFormat%22%3A%22conllu%22}' | |
other trick: use a remote server over ssh | |
1. on server machine, run the corenlp server | |
2. on local machine, run a tunnel in its own terminal: | |
ssh -NL 9000:remotehost:9000 | |
now localhost:9000 connects to remote server at 9000. "curl http://localhost:9000 ..." should work. | |
""" | |
import requests | |
import sys,json | |
def goparse(text, props={'annotators':'tokenize,ssplit'}): | |
if isinstance(text,unicode): | |
text = text.encode("utf-8") | |
assert isinstance(text,str) | |
assert 'annotators' in props | |
assert all(isinstance(v, (str,unicode)) for v in props.values()) | |
r = requests.post('http://localhost:9000/', params={'properties':json.dumps(props)}, data=text) | |
return r.text | |
# try: | |
# return json.loads(r.text) | |
# except: | |
# return {'sentences':[]} | |
# print goparse("Based on the book, the movie is great.", {'annotators':'tokenize,ssplit,pos,depparse', 'outputFormat':'conllu'}) | |
# print goparse("I went there to go there.", {'annotators':'tokenize,ssplit,pos,depparse', 'outputFormat':'conllu'}) | |
# print goparse(sys.argv[1], { | |
# 'annotators':'tokenize,ssplit,pos,parse', | |
# 'parse.model':'edu/stanford/nlp/models/srparser/englishSR.ser.gz', | |
# 'parse.originalDependencies': "true", | |
# 'outputFormat':'conllu', | |
# }) | |
p = goparse(sys.argv[1], { | |
'annotators':'tokenize,ssplit,pos,depparse', | |
'depparse.model':'edu/stanford/nlp/models/parser/nndep/english_SD.gz', | |
}) | |
p = json.loads(p) | |
print json.dumps(p,indent=True) | |
for d in sorted(p['sentences'][0]['basicDependencies'], key=lambda d: (d['governor'],d['dependent'])): | |
print (u"%s -%s-> %s" % ( | |
d['governorGloss']+":%s" % d['governor'], | |
d['dep'], | |
d['dependentGloss']+":%s" % d['dependent'], | |
)).encode("utf8") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment