Skip to content

Instantly share code, notes, and snippets.

@erickpeirson
Last active August 29, 2015 14:21
Show Gist options
  • Save erickpeirson/61eaf3b467947975ab46 to your computer and use it in GitHub Desktop.
Save erickpeirson/61eaf3b467947975ab46 to your computer and use it in GitHub Desktop.
Extractor for OpenIE running as a web socket (using websocketd)
import websocket
import re
import socket
class OpenIEExtractor(object):
endtoken = ''
greeting = '* * * * * * * * * * * * *'
def __init__(self, hostname, port, verbose=False):
url = 'ws://{hostname}:{port}'.format(hostname=hostname, port=port)
self.verbose = verbose
self._connect(url)
def parse(self, sentence):
self._send(sentence)
# The first line of the response should be the request payload.
assert sentence == self.websocket.recv()
results = []
while True:
response = self._get_response()
if response == self.endtoken:
break
# TODO: handle more complex responses (e.g. n-ary extractions).
context = '(0\.[0-9]+)\s+Context\((.+)List\(\[[0-9]+\, [0-9]+\)\)\):\((.+)\)'
pattern = '(0\.[0-9]+)\s+\((.+)\)'
m = re.match(context, response)
if m is not None:
confidence = m.group(1)
context = m.group(2)
relation = m.group(3)
else:
confidence, relation = re.match(pattern, response).groups()
context = None
rtuple = tuple(relation.split('; '))
results.append((confidence, rtuple, context))
return results
def _get_response(self):
try:
response = self.websocket.recv()
if self.verbose: print response
return response
except websocket.WebSocketConnectionClosedException:
# TODO: Should attempt to reestablish connection, max N times.
raise
except websocket.WebSocketTimeoutException:
# TODO: Should attempt to reestablish connection, max N times.
raise
except socket.error:
# TODO: Should attempt to reestablish connection, max N times.
raise
def _send(self, payload):
try:
self.websocket.send(payload)
except websocket.WebSocketPayloadException:
raise ValueError('Invalid sentence')
def _connect(self, url):
self.websocket = create_connection(url)
i = 0
while True:
if self._get_response() == self.greeting: i += 1
if i > 1: break

TODO

  • Retry connection some number of times;
  • Handle more complex responses.

Create a new connection

>>> parser = OpenIEExtractor('cedar.dhcp.asu.edu', 7000, verbose=True)
Loading feature templates.
Loading models.
Loading lexica.
Loading configuration.
Loading feature templates.
Loading models.
Loading feature templates.
Loading models.
Loading lexica.
Loading feature templates.
Loading models.
Loading feature templates.
Loading models.
Loading lexica.
* * * * * * * * * * * * *
* OpenIE 4.1.x is ready *
* * * * * * * * * * * * *

Parse a sentence

>>> parser.parse('Pinyon is the cutest dog in the world, and Scout is also nice')
[('0.94', ('Pinyon', 'is', 'the cutest dog in the world')),
 ('0.80', ('Scout', 'is also', 'nice')),
 ('0.89', ('Pinyon', 'is the cutest dog in', 'the world'))]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment