Skip to content

Instantly share code, notes, and snippets.

@kingjr
Created January 30, 2023 13:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kingjr/700eb43eabc2a020ffb7c76c379ec741 to your computer and use it in GitHub Desktop.
Save kingjr/700eb43eabc2a020ffb7c76c379ec741 to your computer and use it in GitHub Desktop.
#pip install benepar
#pip install protobuf==3.20.0
import spacy
import numpy as np
class Parser():
def __init__(self,):
model = 'fr_core_news_sm'
if not spacy.util.is_package(model):
spacy.cli.download(model)
self.nlp = spacy.load(model)
def process(self, sentence):
doc = self.nlp(sentence)
assert len(list(doc.sents)) == 1
sent = list(doc.sents)[0]
return sent
class DependencyParser(Parser):
def parse(self, sentence):
sent = self.process(sentence)
closeds = []
for current in range(1, len(sent)+1):
closed = 0
for position, word in enumerate(sent): # [:current]
closed += self._is_closed(word, current)
closeds.append(closed)
closing = np.r_[np.diff(closeds), closeds[-1]]
return list(zip(sent, closing))
def _is_closed(self, node, position):
if node.i > position:
return False
for child in node.children:
if child.i > position:
return False
if not self._is_closed(child, position):
return False
return True
class ConstituentParser(Parser):
def __init__(self):
import os
import benepar
super().__init__()
ben_model = 'benepar_fr2'
benepar.download(ben_model)
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'
self.nlp.add_pipe(benepar.BeneparComponent(ben_model))
def parse(self, sentence):
sent = self.process(sentence)
tree = sent._.parse_string
opening = 0
labels = []
for here in sent._.parse_string.split('('):
split = here.split()
if len(split):
opening += 1
closing = 0
if len(split)>1:
word = split[1].strip(')')
closing = split[1].count(')')
labels.append((word, closing))
opened = 0
return labels
const_parser = ConstituentParser()
dep_parser = DependencyParser()
sentence = "les petits chats de Mamie suivent la souris verte."
print(const_parser.parse(sentence))
print(dep_parser.parse(sentence))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment